From 87569f2bab0824e8de6ea64a057fb76f4ddb4f0b Mon Sep 17 00:00:00 2001 From: wangqijian <221900059@smail.nju.edu.cn> Date: Thu, 16 Jan 2025 12:08:31 +0000 Subject: [PATCH 01/18] vllm_test.py in ding/worker --- ding/worker/vllm_test_wqj.py | 222 +++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 ding/worker/vllm_test_wqj.py diff --git a/ding/worker/vllm_test_wqj.py b/ding/worker/vllm_test_wqj.py new file mode 100644 index 0000000000..de80dc7da1 --- /dev/null +++ b/ding/worker/vllm_test_wqj.py @@ -0,0 +1,222 @@ +from typing import List, Tuple +import os +import uuid +from loguru import logger +from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams, RequestOutput +from vllm.assets.image import ImageAsset + +class VllmActor: + def __init__(self, model_path: str) -> None: + """ + Overview: + Initialize the vLLM actor. For more details, please refer to https://docs.vllm.ai/en/stable. + Arguments: + - model_path (str): The path to the language model. + """ + self.free_gpus = self.get_free_gpus() + self.num_gpus = len(self.free_gpus) + assert self.num_gpus > 0, "No GPUs found" + # Set CUDA_VISIBLE_DEVICES to use only free GPUs + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, self.free_gpus)) + self.model_path = model_path + self._initialize() + + def get_free_gpus(self) -> List[int]: + """ + Overview: + Get IDs of GPUs with free memory. + Returns: + - List[int]: The IDs of the free GPUs. + """ + try: + # Get GPU memory usage using nvidia-smi + gpu_stats = os.popen('nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader').readlines() + free_gpus = [] + + for gpu_id, stats in enumerate(gpu_stats): + mem_used, mem_total = map(int, stats.strip().split(',')) + # Consider GPU as free if less than 5% memory is used + if mem_used / mem_total < 0.05: + free_gpus.append(gpu_id) + + return free_gpus if free_gpus else [0] # Default to GPU 0 if no free GPUs found + except Exception: + logger.warning("Failed to get GPU stats, defaulting to GPU 0") + return [0] + + def _initialize(self) -> None: + """ + Overview: + Initialize the vLLM actor with a series of arguments. + """ + logger.info("Initializing vLLM") + # TODO: Try other options in https://docs.vllm.ai/en/stable/models/engine_args.html#engine-args. + engine_args = AsyncEngineArgs( + model=self.model_path, + tensor_parallel_size=self.num_gpus, + max_num_batched_tokens=8192, + max_model_len=8192, + #max_model_len=4096, #see if 8192 works + #max_num_batched_tokens=4096, + #max_num_batched_tokens=2048, + #max_model_len=2048, + # enable_chunked_prefill=True, + max_num_seqs=5, + # Note - mm_processor_kwargs can also be passed to generate/chat calls + mm_processor_kwargs={ + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, + }, + ) + self.engine = AsyncLLMEngine.from_engine_args(engine_args) + + async def generate(self, prompt, num_samples: int, max_tokens: int, temperature: float = 0) -> RequestOutput: + """ + Overview: + Generate tactics for the current state. + Arguments: + - prompt : The prompt to generate tactics. + - num_samples (int): The number of tactics to generate. + - max_tokens (int): The maximum number of tokens to generate. + - temperature (float): The temperature for the language model, default to 0. + Returns: + - RequestOutput: The generated tactics and their log-probabilities. + """ + sampling_params = SamplingParams( + n=num_samples, + max_tokens=max_tokens, + temperature=temperature, + ) + + # Using async iterator to handle vLLM's generation process + # 1. vLLM's generate method is asynchronous to prevent blocking while waiting for model outputs + # 2. async for allows streaming the generated outputs incrementally instead of waiting for all results + # 3. This approach is particularly suitable for LLM inference which can be time-consuming + # 4. The request_id ensures unique identification for each generation request + async for oup in self.engine.generate( + prompt, sampling_params, request_id=str(uuid.uuid4().hex) + ): + final_output = oup + return final_output + + +class HuggingFaceModelGenerator: + """ + Overview: + A LLM/VLM generator that uses Hugging Face models with vLLM as the backend. + """ + + def __init__(self, model_path: str, max_tokens: int = 1024, temperature: float = 0) -> None: + """ + Overview: + Initialize the Hugging Face model generator. + Arguments: + - model_path (str): The path to the language model. + - max_tokens (int): The maximum number of tokens to generate, default to 1024. + - temperature (float): The temperature for the language model, default to 0. + """ + self.vllm_actor = VllmActor(model_path) + self.max_tokens = max_tokens + self.temperature = temperature + + async def generate( + self, + prompt, + num_samples: int, + ) -> List[Tuple[str, float]]: + """ + Overview: + Generate tactics for the current state. + Arguments: + - prompt : The prompt to generate tactics. + - num_samples (int): The number of tactics to generate. + Returns: + - List[Tuple[str, float]]: The generated tactics and their log-probabilities. + + .. note:: + This method is asynchronous and returns a coroutine. + """ + response = await self.vllm_actor.generate(prompt, num_samples, self.max_tokens, self.temperature) + # Use raw logprobs as confidence scores + confidence_scores = [x.cumulative_logprob for x in response.outputs] + return [ + (x.text.strip(), conf) + for x, conf in zip(response.outputs, confidence_scores) + ] + + +model=HuggingFaceModelGenerator('/mnt/afs/share/Qwen2-VL-7B',temperature=0.5) #设置一个temperature就好了,可以做到生成多个候选答案 + +def get_prompts_qwen(questions: list,modality: str): + if modality == "image": + placeholder = "<|image_pad|>" + elif modality == "video": + placeholder = "<|video_pad|>" + + prompts = [("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" + f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>" + f"{question}<|im_end|>\n" + "<|im_start|>assistant\n") for question in questions] + stop_token_ids = None + return prompts,stop_token_ids + +def get_multi_modal_input(modality,filenames,questions): + """ + return { + "data": image or video, + "question": question, + } + """ + if modality == "image": + # Input image and question + ret={} + ret["data"]=[] + ret["question"]=[] + for filename,question in zip(filenames,questions): + image = ImageAsset(filename) \ + .pil_image.convert("RGB") + #img_question = "What is the content of this image?" + img_question=question + ret["data"].append(image) + ret["question"].append(img_question) + return ret + + +questions=["What is the content of this image?","Please describe the image.","How many people are there in the image? What are they doing?"] +img_names=['/mnt/afs/niuyazhe/data/meme/data/Eimages/Eimages/Eimages/image_ (2)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(3127)','/mnt/afs/wangqijian/data/test/test'] +#questions=["What is the content of this image?"] +#img_names=['/mnt/afs/niuyazhe/data/meme/data/Eimages/Eimages/Eimages/image_ (2)'] +num_prompts=len(questions) +image_repeat_prob=None +modality = 'image' + +mm_input = get_multi_modal_input(modality,img_names,questions) +data = mm_input["data"] +question = mm_input["question"] +batch_inference_mine=True +prompts, stop_token_ids = get_prompts_qwen(question,modality) + + +import asyncio +import nest_asyncio +nest_asyncio.apply() +async def main(): + inputs = [ + { + "prompt":prompt, + "multi_modal_data":{ + modality:data + } + } for prompt,data in zip(prompts,data) + ] + # 调用 generate 方法 + for in_data in inputs: + tactics = await model.generate(prompt=in_data, num_samples=3) + # 打印返回结果 + for tactic, confidence in tactics: + print(f"Tactic: {tactic}") + + +# 运行主程序 +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file From ffd435b19b89f2d958cdd50b8195e8f3575a8410 Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Tue, 21 Jan 2025 05:51:23 +0000 Subject: [PATCH 02/18] add vllm_collector and test_vllm --- .../collector/tests/test_vllm_collector.py | 96 ++++++++++++++++++ .../vllm_collector.py} | 97 ++----------------- 2 files changed, 105 insertions(+), 88 deletions(-) create mode 100644 ding/worker/collector/tests/test_vllm_collector.py rename ding/worker/{vllm_test_wqj.py => collector/vllm_collector.py} (64%) diff --git a/ding/worker/collector/tests/test_vllm_collector.py b/ding/worker/collector/tests/test_vllm_collector.py new file mode 100644 index 0000000000..2b2795e00f --- /dev/null +++ b/ding/worker/collector/tests/test_vllm_collector.py @@ -0,0 +1,96 @@ +from typing import List, Tuple +import os +import uuid +from loguru import logger +from ..vllm_collector import HuggingFaceModelGenerator +from vllm.assets.image import ImageAsset + +# set a temperature > 0 to get multiple responses +# note that HuggingFaceModelGenerator has a parameter "mm_processor_kwargs" which is set to align with the settings of Qwen in default +model=HuggingFaceModelGenerator('/mnt/afs/share/Qwen2-VL-7B',temperature=0.5) + +def get_prompts_qwen(questions: list,modality: str): + if modality == "image": + placeholder = "<|image_pad|>" + elif modality == "video": + placeholder = "<|video_pad|>" + else : + msg = f"Modality {modality} is not supported." + raise ValueError(msg) + + prompts = [("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" + f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>" + f"{question}<|im_end|>\n" + "<|im_start|>assistant\n") for question in questions] + stop_token_ids = None + return prompts,stop_token_ids + +def get_multi_modal_input(modality:str ,filenames:list, questions:list): + """ + return { + "data": image or video, + "question": question, + } + """ + if modality == "image": + # Input image and question + ret={ + 'data':[], + 'question':[] + } + for filename,question in zip(filenames,questions): + image = ImageAsset(filename) \ + .pil_image.convert("RGB") + #img_question = "What is the content of this image?" + img_question=question + ret["data"].append(image) + ret["question"].append(img_question) + else: + msg = f"Modality {modality} is not supported." + raise ValueError(msg) + return ret + + +questions=["What is the content of this image?","Please describe the image.","How many people are there in the image? What are they doing?"] +img_names=['/mnt/afs/niuyazhe/data/meme/data/Eimages/Eimages/Eimages/image_ (2)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(3127)','/mnt/afs/wangqijian/data/test/test'] + +num_prompts=len(questions) +image_repeat_prob=None +from enum import Enum + +class Modality(Enum): + IMAGE = 'image' + TEXT = 'text' + VIDEO = 'video' + +modality=Modality.IMAGE.value + +mm_input = get_multi_modal_input(modality,img_names,questions) +data = mm_input["data"] +question = mm_input["question"] +prompts, stop_token_ids = get_prompts_qwen(question,modality) + + +import asyncio +import nest_asyncio +nest_asyncio.apply() +async def main(): + inputs = [ + { + "prompt":prompt, + "multi_modal_data":{ + modality:data + } + } for prompt,data in zip(prompts,data) + ] + # generate responses + for in_data in inputs: + responses = await model.generate(prompt=in_data, num_samples=3) + # print response + for response, confidence in responses: + print(f"Response: {response}") + + +# run main +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/ding/worker/vllm_test_wqj.py b/ding/worker/collector/vllm_collector.py similarity index 64% rename from ding/worker/vllm_test_wqj.py rename to ding/worker/collector/vllm_collector.py index de80dc7da1..c5993c4bdf 100644 --- a/ding/worker/vllm_test_wqj.py +++ b/ding/worker/collector/vllm_collector.py @@ -3,10 +3,10 @@ import uuid from loguru import logger from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams, RequestOutput -from vllm.assets.image import ImageAsset + class VllmActor: - def __init__(self, model_path: str) -> None: + def __init__(self, model_path: str,mm_processor_kwargs: dict) -> None: """ Overview: Initialize the vLLM actor. For more details, please refer to https://docs.vllm.ai/en/stable. @@ -19,6 +19,7 @@ def __init__(self, model_path: str) -> None: # Set CUDA_VISIBLE_DEVICES to use only free GPUs os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, self.free_gpus)) self.model_path = model_path + self.mm_processor_kwargs=mm_processor_kwargs self._initialize() def get_free_gpus(self) -> List[int]: @@ -56,17 +57,10 @@ def _initialize(self) -> None: tensor_parallel_size=self.num_gpus, max_num_batched_tokens=8192, max_model_len=8192, - #max_model_len=4096, #see if 8192 works - #max_num_batched_tokens=4096, - #max_num_batched_tokens=2048, - #max_model_len=2048, # enable_chunked_prefill=True, max_num_seqs=5, # Note - mm_processor_kwargs can also be passed to generate/chat calls - mm_processor_kwargs={ - "min_pixels": 28 * 28, - "max_pixels": 1280 * 28 * 28, - }, + mm_processor_kwargs=self.mm_processor_kwargs, ) self.engine = AsyncLLMEngine.from_engine_args(engine_args) @@ -106,7 +100,10 @@ class HuggingFaceModelGenerator: A LLM/VLM generator that uses Hugging Face models with vLLM as the backend. """ - def __init__(self, model_path: str, max_tokens: int = 1024, temperature: float = 0) -> None: + def __init__(self, model_path: str, max_tokens: int = 1024, temperature: float = 0, mm_processor_kwargs:dict = { + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, + }) -> None: """ Overview: Initialize the Hugging Face model generator. @@ -115,7 +112,7 @@ def __init__(self, model_path: str, max_tokens: int = 1024, temperature: float = - max_tokens (int): The maximum number of tokens to generate, default to 1024. - temperature (float): The temperature for the language model, default to 0. """ - self.vllm_actor = VllmActor(model_path) + self.vllm_actor = VllmActor(model_path,mm_processor_kwargs) self.max_tokens = max_tokens self.temperature = temperature @@ -144,79 +141,3 @@ async def generate( for x, conf in zip(response.outputs, confidence_scores) ] - -model=HuggingFaceModelGenerator('/mnt/afs/share/Qwen2-VL-7B',temperature=0.5) #设置一个temperature就好了,可以做到生成多个候选答案 - -def get_prompts_qwen(questions: list,modality: str): - if modality == "image": - placeholder = "<|image_pad|>" - elif modality == "video": - placeholder = "<|video_pad|>" - - prompts = [("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" - f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>" - f"{question}<|im_end|>\n" - "<|im_start|>assistant\n") for question in questions] - stop_token_ids = None - return prompts,stop_token_ids - -def get_multi_modal_input(modality,filenames,questions): - """ - return { - "data": image or video, - "question": question, - } - """ - if modality == "image": - # Input image and question - ret={} - ret["data"]=[] - ret["question"]=[] - for filename,question in zip(filenames,questions): - image = ImageAsset(filename) \ - .pil_image.convert("RGB") - #img_question = "What is the content of this image?" - img_question=question - ret["data"].append(image) - ret["question"].append(img_question) - return ret - - -questions=["What is the content of this image?","Please describe the image.","How many people are there in the image? What are they doing?"] -img_names=['/mnt/afs/niuyazhe/data/meme/data/Eimages/Eimages/Eimages/image_ (2)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(3127)','/mnt/afs/wangqijian/data/test/test'] -#questions=["What is the content of this image?"] -#img_names=['/mnt/afs/niuyazhe/data/meme/data/Eimages/Eimages/Eimages/image_ (2)'] -num_prompts=len(questions) -image_repeat_prob=None -modality = 'image' - -mm_input = get_multi_modal_input(modality,img_names,questions) -data = mm_input["data"] -question = mm_input["question"] -batch_inference_mine=True -prompts, stop_token_ids = get_prompts_qwen(question,modality) - - -import asyncio -import nest_asyncio -nest_asyncio.apply() -async def main(): - inputs = [ - { - "prompt":prompt, - "multi_modal_data":{ - modality:data - } - } for prompt,data in zip(prompts,data) - ] - # 调用 generate 方法 - for in_data in inputs: - tactics = await model.generate(prompt=in_data, num_samples=3) - # 打印返回结果 - for tactic, confidence in tactics: - print(f"Tactic: {tactic}") - - -# 运行主程序 -if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file From 4d60f88406c65d123fd7d612c0309439f76d9db4 Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Tue, 21 Jan 2025 13:04:18 +0000 Subject: [PATCH 03/18] formatted vllm & test_vllm --- .../collector/tests/test_vllm_collector.py | 75 ++++++++++--------- ding/worker/collector/vllm_collector.py | 34 +++++---- 2 files changed, 59 insertions(+), 50 deletions(-) diff --git a/ding/worker/collector/tests/test_vllm_collector.py b/ding/worker/collector/tests/test_vllm_collector.py index 2b2795e00f..58a675532e 100644 --- a/ding/worker/collector/tests/test_vllm_collector.py +++ b/ding/worker/collector/tests/test_vllm_collector.py @@ -4,28 +4,34 @@ from loguru import logger from ..vllm_collector import HuggingFaceModelGenerator from vllm.assets.image import ImageAsset - + # set a temperature > 0 to get multiple responses -# note that HuggingFaceModelGenerator has a parameter "mm_processor_kwargs" which is set to align with the settings of Qwen in default -model=HuggingFaceModelGenerator('/mnt/afs/share/Qwen2-VL-7B',temperature=0.5) +# note that HFModelGenerator has a parameter "mm_processor_kwargs" set to align with the settings of Qwen in default +model = HuggingFaceModelGenerator('/mnt/afs/share/Qwen2-VL-7B', temperature=0.5) + -def get_prompts_qwen(questions: list,modality: str): +def get_prompts_qwen(questions: list, modality: str): if modality == "image": placeholder = "<|image_pad|>" elif modality == "video": placeholder = "<|video_pad|>" - else : + else: msg = f"Modality {modality} is not supported." raise ValueError(msg) - prompts = [("<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" - f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>" - f"{question}<|im_end|>\n" - "<|im_start|>assistant\n") for question in questions] + prompts = [ + ( + "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" + f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>" + f"{question}<|im_end|>\n" + "<|im_start|>assistant\n" + ) for question in questions + ] stop_token_ids = None - return prompts,stop_token_ids + return prompts, stop_token_ids + -def get_multi_modal_input(modality:str ,filenames:list, questions:list): +def get_multi_modal_input(modality: str, filenames: list, questions: list): """ return { "data": image or video, @@ -34,15 +40,12 @@ def get_multi_modal_input(modality:str ,filenames:list, questions:list): """ if modality == "image": # Input image and question - ret={ - 'data':[], - 'question':[] - } - for filename,question in zip(filenames,questions): + ret = {'data': [], 'question': []} + for filename, question in zip(filenames, questions): image = ImageAsset(filename) \ .pil_image.convert("RGB") #img_question = "What is the content of this image?" - img_question=question + img_question = question ret["data"].append(image) ret["question"].append(img_question) else: @@ -51,46 +54,48 @@ def get_multi_modal_input(modality:str ,filenames:list, questions:list): return ret -questions=["What is the content of this image?","Please describe the image.","How many people are there in the image? What are they doing?"] -img_names=['/mnt/afs/niuyazhe/data/meme/data/Eimages/Eimages/Eimages/image_ (2)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(3127)','/mnt/afs/wangqijian/data/test/test'] +questions = [ + "What is the content of this image?", "Please describe the image.", + "How many people are there in the image? What are they doing?" +] +img_names = [ + '/mnt/afs/niuyazhe/data/meme/data/Eimages/Eimages/Eimages/image_ (2)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(3127)', '/mnt/afs/wangqijian/data/test/test' +] -num_prompts=len(questions) -image_repeat_prob=None +num_prompts = len(questions) +image_repeat_prob = None from enum import Enum + class Modality(Enum): IMAGE = 'image' TEXT = 'text' VIDEO = 'video' -modality=Modality.IMAGE.value -mm_input = get_multi_modal_input(modality,img_names,questions) +modality = Modality.IMAGE.value + +mm_input = get_multi_modal_input(modality, img_names, questions) data = mm_input["data"] question = mm_input["question"] -prompts, stop_token_ids = get_prompts_qwen(question,modality) - +prompts, stop_token_ids = get_prompts_qwen(question, modality) import asyncio import nest_asyncio nest_asyncio.apply() + + async def main(): - inputs = [ - { - "prompt":prompt, - "multi_modal_data":{ - modality:data - } - } for prompt,data in zip(prompts,data) - ] + inputs = [{"prompt": prompt, "multi_modal_data": {modality: data}} for prompt, data in zip(prompts, data)] # generate responses for in_data in inputs: responses = await model.generate(prompt=in_data, num_samples=3) # print response for response, confidence in responses: print(f"Response: {response}") - + # run main if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) diff --git a/ding/worker/collector/vllm_collector.py b/ding/worker/collector/vllm_collector.py index c5993c4bdf..3bd138587f 100644 --- a/ding/worker/collector/vllm_collector.py +++ b/ding/worker/collector/vllm_collector.py @@ -6,7 +6,8 @@ class VllmActor: - def __init__(self, model_path: str,mm_processor_kwargs: dict) -> None: + + def __init__(self, model_path: str, mm_processor_kwargs: dict) -> None: """ Overview: Initialize the vLLM actor. For more details, please refer to https://docs.vllm.ai/en/stable. @@ -19,7 +20,7 @@ def __init__(self, model_path: str,mm_processor_kwargs: dict) -> None: # Set CUDA_VISIBLE_DEVICES to use only free GPUs os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, self.free_gpus)) self.model_path = model_path - self.mm_processor_kwargs=mm_processor_kwargs + self.mm_processor_kwargs = mm_processor_kwargs self._initialize() def get_free_gpus(self) -> List[int]: @@ -31,7 +32,8 @@ def get_free_gpus(self) -> List[int]: """ try: # Get GPU memory usage using nvidia-smi - gpu_stats = os.popen('nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader').readlines() + gpu_stats = os.popen('nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader')\ + .readlines() free_gpus = [] for gpu_id, stats in enumerate(gpu_stats): @@ -81,7 +83,7 @@ async def generate(self, prompt, num_samples: int, max_tokens: int, temperature: max_tokens=max_tokens, temperature=temperature, ) - + # Using async iterator to handle vLLM's generation process # 1. vLLM's generate method is asynchronous to prevent blocking while waiting for model outputs # 2. async for allows streaming the generated outputs incrementally instead of waiting for all results @@ -100,10 +102,16 @@ class HuggingFaceModelGenerator: A LLM/VLM generator that uses Hugging Face models with vLLM as the backend. """ - def __init__(self, model_path: str, max_tokens: int = 1024, temperature: float = 0, mm_processor_kwargs:dict = { + def __init__( + self, + model_path: str, + max_tokens: int = 1024, + temperature: float = 0, + mm_processor_kwargs: dict = { "min_pixels": 28 * 28, "max_pixels": 1280 * 28 * 28, - }) -> None: + } + ) -> None: """ Overview: Initialize the Hugging Face model generator. @@ -112,14 +120,14 @@ def __init__(self, model_path: str, max_tokens: int = 1024, temperature: float = - max_tokens (int): The maximum number of tokens to generate, default to 1024. - temperature (float): The temperature for the language model, default to 0. """ - self.vllm_actor = VllmActor(model_path,mm_processor_kwargs) + self.vllm_actor = VllmActor(model_path, mm_processor_kwargs) self.max_tokens = max_tokens self.temperature = temperature async def generate( - self, - prompt, - num_samples: int, + self, + prompt, + num_samples: int, ) -> List[Tuple[str, float]]: """ Overview: @@ -136,8 +144,4 @@ async def generate( response = await self.vllm_actor.generate(prompt, num_samples, self.max_tokens, self.temperature) # Use raw logprobs as confidence scores confidence_scores = [x.cumulative_logprob for x in response.outputs] - return [ - (x.text.strip(), conf) - for x, conf in zip(response.outputs, confidence_scores) - ] - + return [(x.text.strip(), conf) for x, conf in zip(response.outputs, confidence_scores)] From 0d311ec76763f18cc9231e82dc134f87fc578808 Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Wed, 22 Jan 2025 04:27:26 +0000 Subject: [PATCH 04/18] enum+typing lint --- .../collector/tests/test_vllm_collector.py | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/ding/worker/collector/tests/test_vllm_collector.py b/ding/worker/collector/tests/test_vllm_collector.py index 58a675532e..509c733ce1 100644 --- a/ding/worker/collector/tests/test_vllm_collector.py +++ b/ding/worker/collector/tests/test_vllm_collector.py @@ -1,4 +1,4 @@ -from typing import List, Tuple +from typing import List, Tuple, Optional import os import uuid from loguru import logger @@ -9,11 +9,16 @@ # note that HFModelGenerator has a parameter "mm_processor_kwargs" set to align with the settings of Qwen in default model = HuggingFaceModelGenerator('/mnt/afs/share/Qwen2-VL-7B', temperature=0.5) +from enum import Enum +class Modality(Enum): + IMAGE = "image" + TEXT = "text" + VIDEO = "video" -def get_prompts_qwen(questions: list, modality: str): - if modality == "image": +def get_prompts_qwen(questions: list, modality: Modality) -> Tuple[List[str],Optional[List[int]]]: + if modality == Modality.IMAGE: placeholder = "<|image_pad|>" - elif modality == "video": + elif modality == Modality.VIDEO: placeholder = "<|video_pad|>" else: msg = f"Modality {modality} is not supported." @@ -28,17 +33,17 @@ def get_prompts_qwen(questions: list, modality: str): ) for question in questions ] stop_token_ids = None - return prompts, stop_token_ids + return prompts,stop_token_ids -def get_multi_modal_input(modality: str, filenames: list, questions: list): +def get_multi_modal_input(modality: Modality, filenames: list, questions: list) -> dict: """ return { "data": image or video, "question": question, } """ - if modality == "image": + if modality == Modality.IMAGE: # Input image and question ret = {'data': [], 'question': []} for filename, question in zip(filenames, questions): @@ -65,16 +70,11 @@ def get_multi_modal_input(modality: str, filenames: list, questions: list): num_prompts = len(questions) image_repeat_prob = None -from enum import Enum -class Modality(Enum): - IMAGE = 'image' - TEXT = 'text' - VIDEO = 'video' -modality = Modality.IMAGE.value +modality = Modality.IMAGE mm_input = get_multi_modal_input(modality, img_names, questions) data = mm_input["data"] @@ -87,7 +87,7 @@ class Modality(Enum): async def main(): - inputs = [{"prompt": prompt, "multi_modal_data": {modality: data}} for prompt, data in zip(prompts, data)] + inputs = [{"prompt": prompt, "multi_modal_data": {modality.value: data}} for prompt, data in zip(prompts, data)] # generate responses for in_data in inputs: responses = await model.generate(prompt=in_data, num_samples=3) From e119c8592a766c1956c14d1eb1b22a48c8bd3848 Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Wed, 22 Jan 2025 12:23:27 +0000 Subject: [PATCH 05/18] add test_vllm_collector_multigpu.py --- .../tests/test_vllm_collector__multigpu.py | 277 ++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100644 ding/worker/collector/tests/test_vllm_collector__multigpu.py diff --git a/ding/worker/collector/tests/test_vllm_collector__multigpu.py b/ding/worker/collector/tests/test_vllm_collector__multigpu.py new file mode 100644 index 0000000000..fe0813a7a4 --- /dev/null +++ b/ding/worker/collector/tests/test_vllm_collector__multigpu.py @@ -0,0 +1,277 @@ +from typing import List, Tuple +import os +import uuid +from loguru import logger +from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams, RequestOutput + + +class VllmActor: + def __init__(self, model_path: str,mm_processor_kwargs: dict,free_gpus:list) -> None: + """ + Overview: + Initialize the vLLM actor. For more details, please refer to https://docs.vllm.ai/en/stable. + Arguments: + - model_path (str): The path to the language model. + """ + self.free_gpus = free_gpus + self.num_gpus = len(self.free_gpus) + assert self.num_gpus > 0, "No GPUs found" + # Set CUDA_VISIBLE_DEVICES to use only free GPUs + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, self.free_gpus)) + self.model_path = model_path + self.mm_processor_kwargs=mm_processor_kwargs + self._initialize() + + def _initialize(self) -> None: + """ + Overview: + Initialize the vLLM actor with a series of arguments. + """ + logger.info("Initializing vLLM") + # TODO: Try other options in https://docs.vllm.ai/en/stable/models/engine_args.html#engine-args. + engine_args = AsyncEngineArgs( + model=self.model_path, + tensor_parallel_size=self.num_gpus, + max_num_batched_tokens=8192, + max_model_len=8192, + # enable_chunked_prefill=True, + max_num_seqs=5, + # Note - mm_processor_kwargs can also be passed to generate/chat calls + mm_processor_kwargs=self.mm_processor_kwargs, + ) + self.engine = AsyncLLMEngine.from_engine_args(engine_args) + + async def generate(self, prompt, num_samples: int, max_tokens: int, temperature: float = 0) -> RequestOutput: + """ + Overview: + Generate tactics for the current state. + Arguments: + - prompt : The prompt to generate tactics. + - num_samples (int): The number of tactics to generate. + - max_tokens (int): The maximum number of tokens to generate. + - temperature (float): The temperature for the language model, default to 0. + Returns: + - RequestOutput: The generated tactics and their log-probabilities. + """ + sampling_params = SamplingParams( + n=num_samples, + max_tokens=max_tokens, + temperature=temperature, + ) + + # Using async iterator to handle vLLM's generation process + # 1. vLLM's generate method is asynchronous to prevent blocking while waiting for model outputs + # 2. async for allows streaming the generated outputs incrementally instead of waiting for all results + # 3. This approach is particularly suitable for LLM inference which can be time-consuming + # 4. The request_id ensures unique identification for each generation request + async for oup in self.engine.generate( + prompt, sampling_params, request_id=str(uuid.uuid4().hex) + ): + final_output = oup + return final_output + + +class HuggingFaceModelGenerator: + """ + Overview: + A LLM/VLM generator that uses Hugging Face models with vLLM as the backend. + """ + + def __init__(self, model_path: str, free_gpus:list, max_tokens: int = 1024, temperature: float = 0, mm_processor_kwargs:dict = { + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, + }) -> None: + """ + Overview: + Initialize the Hugging Face model generator. + Arguments: + - model_path (str): The path to the language model. + - max_tokens (int): The maximum number of tokens to generate, default to 1024. + - temperature (float): The temperature for the language model, default to 0. + """ + self.vllm_actor = VllmActor(model_path,mm_processor_kwargs,free_gpus) + self.max_tokens = max_tokens + self.temperature = temperature + + async def generate( + self, + prompt, + num_samples: int, + ) -> List[Tuple[str, float]]: + """ + Overview: + Generate tactics for the current state. + Arguments: + - prompt : The prompt to generate tactics. + - num_samples (int): The number of tactics to generate. + Returns: + - List[Tuple[str, float]]: The generated tactics and their log-probabilities. + + .. note:: + This method is asynchronous and returns a coroutine. + """ + response = await self.vllm_actor.generate(prompt, num_samples, self.max_tokens, self.temperature) + # Use raw logprobs as confidence scores + confidence_scores = [x.cumulative_logprob for x in response.outputs] + return [ + (x.text.strip(), conf) + for x, conf in zip(response.outputs, confidence_scores) + ] + + +def get_free_gpus() -> List[int]: + """ + Overview: + Get IDs of GPUs with free memory. + Returns: + - List[int]: The IDs of the free GPUs. + """ + try: + # Get GPU memory usage using nvidia-smi + gpu_stats = os.popen('nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader')\ + .readlines() + free_gpus = [] + + for gpu_id, stats in enumerate(gpu_stats): + mem_used, mem_total = map(int, stats.strip().split(',')) + # Consider GPU as free if less than 5% memory is used + if mem_used / mem_total < 0.05: + free_gpus.append(gpu_id) + + return free_gpus if free_gpus else [0] # Default to GPU 0 if no free GPUs found + except Exception: + logger.warning("Failed to get GPU stats, defaulting to GPU 0") + return [0] + +def chunk_list(original_list, t): + # 使用列表推导式和切片 + new_list = [original_list[i:i + t] for i in range(0, len(original_list), t)] + return new_list + + +from typing import List, Tuple, Optional +import os +from loguru import logger +from vllm.assets.image import ImageAsset +from enum import Enum +import concurrent.futures +class Modality(Enum): + IMAGE = "image" + TEXT = "text" + VIDEO = "video" + +def get_prompts_qwen(questions: list, modality: Modality) -> Tuple[List[str],Optional[List[int]]]: + if modality == Modality.IMAGE: + placeholder = "<|image_pad|>" + elif modality == Modality.VIDEO: + placeholder = "<|video_pad|>" + else: + msg = f"Modality {modality} is not supported." + raise ValueError(msg) + + prompts = [ + ( + "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" + f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>" + f"{question}<|im_end|>\n" + "<|im_start|>assistant\n" + ) for question in questions + ] + stop_token_ids = None + return prompts,stop_token_ids + + +def get_multi_modal_input(modality: Modality, filenames: list, questions: list) -> dict: + """ + return { + "data": image or video, + "question": question, + } + """ + if modality == Modality.IMAGE: + # Input image and question + ret = {'data': [], 'question': []} + for filename, question in zip(filenames, questions): + image = ImageAsset(filename) \ + .pil_image.convert("RGB") + #img_question = "What is the content of this image?" + img_question = question + ret["data"].append(image) + ret["question"].append(img_question) + else: + msg = f"Modality {modality} is not supported." + raise ValueError(msg) + return ret + + +async def run_vllm_collector(gpu_id, prompts, model_path,temperature): + # 设置当前进程的可用GPU + os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) + + model = HuggingFaceModelGenerator(model_path,free_gpus=[gpu_id],temperature=temperature) # 实例化模型 + + responses_list = [] + for prompt in prompts: + responses = await model.generate(prompt, num_samples=3) + for response in responses: + responses_list.append(response) + print(f"[GPU {gpu_id}] Response: {response}") + + return responses_list + +import asyncio +import nest_asyncio +def start_collector(gpu_id, prompts, model_path,temperature): + # 在每个进程中运行事件循环 + results = asyncio.run(run_vllm_collector(gpu_id, prompts, model_path,temperature)) + return results + +def main(prompts, model_path, free_gpus,temperature): + num_tot=len(prompts) + num_gpu=len(free_gpus) + num_per_gpu=num_tot//num_gpu + prompts_per_gpu=chunk_list(prompts,num_per_gpu) + with concurrent.futures.ProcessPoolExecutor(max_workers=len(free_gpus)) as executor: + futures = [] + for gpu_id,prompts_gpu in zip(free_gpus,prompts_per_gpu): + futures.append(executor.submit(start_collector, gpu_id, prompts_gpu, model_path,temperature)) + + # 收集所有结果 + all_results = [] + for future in concurrent.futures.as_completed(futures): + all_results.extend(future.result()) + + # 保存结果的逻辑 + with open("/mnt/afs/wangqijian/tests/vllm_multi_gpu.txt", "w") as f: + for response in all_results: + f.write(f"{response}\n") + + + + +if __name__ == "__main__": + questions=['Please describe the image.','Please describe the image.', + 'What\'s the text in the image?','What\'s the text in the image?', + 'What is in the image?','What is in the image?','How many people are in the image?','How many people are in the image?', + 'What is the emotion of the main character of the image?','What is the emotion of the main character of the image?', + 'How many animals are in the image?','How many animals are in the image?', + 'What is the place of the image?','What is the place of the image?','What is the peroson doing?','What is the peroson doing?' + ] + img_names=['/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2127)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5394)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(1160)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4956)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2212)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(3387)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4086)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4384)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5000)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(1237)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(766)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(6031)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(6)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2284)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4533)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5495)' + ] + free_gpus=get_free_gpus() + modality = Modality.IMAGE + mm_input = get_multi_modal_input(modality, img_names, questions) + data = mm_input["data"] + question = mm_input["question"] + prompts, stop_token_ids = get_prompts_qwen(question, modality) + model_path='/mnt/afs/share/Qwen2-VL-7B' + temperature=0.5 + main(prompts,model_path,free_gpus,temperature) \ No newline at end of file From bf3415575eeaab6e4df651bd4f64c65a87f3f0c6 Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Wed, 22 Jan 2025 12:33:15 +0000 Subject: [PATCH 06/18] Add test_vllm_collector_multigpu --- .../tests/test_vllm_collector__multigpu.py | 103 ++++++++++-------- 1 file changed, 58 insertions(+), 45 deletions(-) diff --git a/ding/worker/collector/tests/test_vllm_collector__multigpu.py b/ding/worker/collector/tests/test_vllm_collector__multigpu.py index fe0813a7a4..e23058a27a 100644 --- a/ding/worker/collector/tests/test_vllm_collector__multigpu.py +++ b/ding/worker/collector/tests/test_vllm_collector__multigpu.py @@ -77,7 +77,8 @@ class HuggingFaceModelGenerator: A LLM/VLM generator that uses Hugging Face models with vLLM as the backend. """ - def __init__(self, model_path: str, free_gpus:list, max_tokens: int = 1024, temperature: float = 0, mm_processor_kwargs:dict = { + def __init__(self, model_path: str, free_gpus:list, + max_tokens: int = 1024, temperature: float = 0, mm_processor_kwargs:dict = { "min_pixels": 28 * 28, "max_pixels": 1280 * 28 * 28, }) -> None: @@ -120,31 +121,31 @@ async def generate( def get_free_gpus() -> List[int]: - """ - Overview: - Get IDs of GPUs with free memory. - Returns: - - List[int]: The IDs of the free GPUs. - """ - try: - # Get GPU memory usage using nvidia-smi - gpu_stats = os.popen('nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader')\ - .readlines() - free_gpus = [] + """ + Overview: + Get IDs of GPUs with free memory. + Returns: + - List[int]: The IDs of the free GPUs. + """ + try: + # Get GPU memory usage using nvidia-smi + gpu_stats = os.popen('nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader')\ + .readlines() + free_gpus = [] - for gpu_id, stats in enumerate(gpu_stats): - mem_used, mem_total = map(int, stats.strip().split(',')) - # Consider GPU as free if less than 5% memory is used - if mem_used / mem_total < 0.05: - free_gpus.append(gpu_id) + for gpu_id, stats in enumerate(gpu_stats): + mem_used, mem_total = map(int, stats.strip().split(',')) + # Consider GPU as free if less than 5% memory is used + if mem_used / mem_total < 0.05: + free_gpus.append(gpu_id) - return free_gpus if free_gpus else [0] # Default to GPU 0 if no free GPUs found - except Exception: - logger.warning("Failed to get GPU stats, defaulting to GPU 0") - return [0] + return free_gpus if free_gpus else [0] # Default to GPU 0 if no free GPUs found + except Exception: + logger.warning("Failed to get GPU stats, defaulting to GPU 0") + return [0] -def chunk_list(original_list, t): - # 使用列表推导式和切片 +def chunk_list(original_list:list, t:int) -> List[list]: + # chunk the list into sub_lists new_list = [original_list[i:i + t] for i in range(0, len(original_list), t)] return new_list @@ -204,11 +205,11 @@ def get_multi_modal_input(modality: Modality, filenames: list, questions: list) return ret -async def run_vllm_collector(gpu_id, prompts, model_path,temperature): - # 设置当前进程的可用GPU +async def run_vllm_collector(gpu_id:int, prompts:List, model_path:str,temperature:float) ->List[str]: + # set visible gpu os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) - - model = HuggingFaceModelGenerator(model_path,free_gpus=[gpu_id],temperature=temperature) # 实例化模型 + # get a model on a single gpu + model = HuggingFaceModelGenerator(model_path,free_gpus=[gpu_id],temperature=temperature) responses_list = [] for prompt in prompts: @@ -220,13 +221,12 @@ async def run_vllm_collector(gpu_id, prompts, model_path,temperature): return responses_list import asyncio -import nest_asyncio -def start_collector(gpu_id, prompts, model_path,temperature): - # 在每个进程中运行事件循环 +def start_collector(gpu_id:int, prompts:list, model_path:str,temperature:float) ->List[str]: + # event loop in a process results = asyncio.run(run_vllm_collector(gpu_id, prompts, model_path,temperature)) return results -def main(prompts, model_path, free_gpus,temperature): +def main(prompts:list, model_path:str, free_gpus:List[int],temperature:float) -> None: num_tot=len(prompts) num_gpu=len(free_gpus) num_per_gpu=num_tot//num_gpu @@ -236,12 +236,12 @@ def main(prompts, model_path, free_gpus,temperature): for gpu_id,prompts_gpu in zip(free_gpus,prompts_per_gpu): futures.append(executor.submit(start_collector, gpu_id, prompts_gpu, model_path,temperature)) - # 收集所有结果 + # get all results all_results = [] for future in concurrent.futures.as_completed(futures): all_results.extend(future.result()) - # 保存结果的逻辑 + # save results with open("/mnt/afs/wangqijian/tests/vllm_multi_gpu.txt", "w") as f: for response in all_results: f.write(f"{response}\n") @@ -252,19 +252,32 @@ def main(prompts, model_path, free_gpus,temperature): if __name__ == "__main__": questions=['Please describe the image.','Please describe the image.', 'What\'s the text in the image?','What\'s the text in the image?', - 'What is in the image?','What is in the image?','How many people are in the image?','How many people are in the image?', - 'What is the emotion of the main character of the image?','What is the emotion of the main character of the image?', - 'How many animals are in the image?','How many animals are in the image?', - 'What is the place of the image?','What is the place of the image?','What is the peroson doing?','What is the peroson doing?' + 'What is in the image?','What is in the image?', + 'How many people are in the image?','How many people are in the image?', + 'What is the emotion of the main character of the image?', + 'What is the emotion of the main character of the image?', + 'How many animals are in the image?', + 'How many animals are in the image?', + 'What is the place of the image?','What is the place of the image?', + 'What is the peroson doing?','What is the peroson doing?' ] - img_names=['/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2127)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5394)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(1160)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4956)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2212)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(3387)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4086)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4384)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5000)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(1237)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(766)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(6031)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(6)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2284)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4533)','/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5495)' + img_names=[ + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2127)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5394)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(1160)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4956)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2212)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(3387)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4086)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4384)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5000)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(1237)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(766)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(6031)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(6)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2284)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4533)', + '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5495)' ] free_gpus=get_free_gpus() modality = Modality.IMAGE From 1794b6a6c90c64d622f6ff5632755d291440a293 Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Wed, 22 Jan 2025 12:48:48 +0000 Subject: [PATCH 07/18] formatted test_vllm_collector_multigpu --- ...gpu.py => test_vllm_collector_multigpu.py} | 106 ++++++++++-------- 1 file changed, 57 insertions(+), 49 deletions(-) rename ding/worker/collector/tests/{test_vllm_collector__multigpu.py => test_vllm_collector_multigpu.py} (80%) diff --git a/ding/worker/collector/tests/test_vllm_collector__multigpu.py b/ding/worker/collector/tests/test_vllm_collector_multigpu.py similarity index 80% rename from ding/worker/collector/tests/test_vllm_collector__multigpu.py rename to ding/worker/collector/tests/test_vllm_collector_multigpu.py index e23058a27a..bb6c977c6b 100644 --- a/ding/worker/collector/tests/test_vllm_collector__multigpu.py +++ b/ding/worker/collector/tests/test_vllm_collector_multigpu.py @@ -6,7 +6,8 @@ class VllmActor: - def __init__(self, model_path: str,mm_processor_kwargs: dict,free_gpus:list) -> None: + + def __init__(self, model_path: str, mm_processor_kwargs: dict, free_gpus: list) -> None: """ Overview: Initialize the vLLM actor. For more details, please refer to https://docs.vllm.ai/en/stable. @@ -19,7 +20,7 @@ def __init__(self, model_path: str,mm_processor_kwargs: dict,free_gpus:list) -> # Set CUDA_VISIBLE_DEVICES to use only free GPUs os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, self.free_gpus)) self.model_path = model_path - self.mm_processor_kwargs=mm_processor_kwargs + self.mm_processor_kwargs = mm_processor_kwargs self._initialize() def _initialize(self) -> None: @@ -58,7 +59,7 @@ async def generate(self, prompt, num_samples: int, max_tokens: int, temperature: max_tokens=max_tokens, temperature=temperature, ) - + # Using async iterator to handle vLLM's generation process # 1. vLLM's generate method is asynchronous to prevent blocking while waiting for model outputs # 2. async for allows streaming the generated outputs incrementally instead of waiting for all results @@ -77,11 +78,17 @@ class HuggingFaceModelGenerator: A LLM/VLM generator that uses Hugging Face models with vLLM as the backend. """ - def __init__(self, model_path: str, free_gpus:list, - max_tokens: int = 1024, temperature: float = 0, mm_processor_kwargs:dict = { + def __init__( + self, + model_path: str, + free_gpus: list, + max_tokens: int = 1024, + temperature: float = 0, + mm_processor_kwargs: dict = { "min_pixels": 28 * 28, "max_pixels": 1280 * 28 * 28, - }) -> None: + } + ) -> None: """ Overview: Initialize the Hugging Face model generator. @@ -90,14 +97,14 @@ def __init__(self, model_path: str, free_gpus:list, - max_tokens (int): The maximum number of tokens to generate, default to 1024. - temperature (float): The temperature for the language model, default to 0. """ - self.vllm_actor = VllmActor(model_path,mm_processor_kwargs,free_gpus) + self.vllm_actor = VllmActor(model_path, mm_processor_kwargs, free_gpus) self.max_tokens = max_tokens self.temperature = temperature async def generate( - self, - prompt, - num_samples: int, + self, + prompt, + num_samples: int, ) -> List[Tuple[str, float]]: """ Overview: @@ -114,11 +121,8 @@ async def generate( response = await self.vllm_actor.generate(prompt, num_samples, self.max_tokens, self.temperature) # Use raw logprobs as confidence scores confidence_scores = [x.cumulative_logprob for x in response.outputs] - return [ - (x.text.strip(), conf) - for x, conf in zip(response.outputs, confidence_scores) - ] - + return [(x.text.strip(), conf) for x, conf in zip(response.outputs, confidence_scores)] + def get_free_gpus() -> List[int]: """ @@ -144,7 +148,8 @@ def get_free_gpus() -> List[int]: logger.warning("Failed to get GPU stats, defaulting to GPU 0") return [0] -def chunk_list(original_list:list, t:int) -> List[list]: + +def chunk_list(original_list: list, t: int) -> List[list]: # chunk the list into sub_lists new_list = [original_list[i:i + t] for i in range(0, len(original_list), t)] return new_list @@ -156,12 +161,15 @@ def chunk_list(original_list:list, t:int) -> List[list]: from vllm.assets.image import ImageAsset from enum import Enum import concurrent.futures + + class Modality(Enum): IMAGE = "image" TEXT = "text" VIDEO = "video" -def get_prompts_qwen(questions: list, modality: Modality) -> Tuple[List[str],Optional[List[int]]]: + +def get_prompts_qwen(questions: list, modality: Modality) -> Tuple[List[str], Optional[List[int]]]: if modality == Modality.IMAGE: placeholder = "<|image_pad|>" elif modality == Modality.VIDEO: @@ -179,7 +187,7 @@ def get_prompts_qwen(questions: list, modality: Modality) -> Tuple[List[str],Opt ) for question in questions ] stop_token_ids = None - return prompts,stop_token_ids + return prompts, stop_token_ids def get_multi_modal_input(modality: Modality, filenames: list, questions: list) -> dict: @@ -205,11 +213,11 @@ def get_multi_modal_input(modality: Modality, filenames: list, questions: list) return ret -async def run_vllm_collector(gpu_id:int, prompts:List, model_path:str,temperature:float) ->List[str]: +async def run_vllm_collector(gpu_id: int, prompts: List, model_path: str, temperature: float) -> List[str]: # set visible gpu os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) # get a model on a single gpu - model = HuggingFaceModelGenerator(model_path,free_gpus=[gpu_id],temperature=temperature) + model = HuggingFaceModelGenerator(model_path, free_gpus=[gpu_id], temperature=temperature) responses_list = [] for prompt in prompts: @@ -220,21 +228,25 @@ async def run_vllm_collector(gpu_id:int, prompts:List, model_path:str,temperatur return responses_list + import asyncio -def start_collector(gpu_id:int, prompts:list, model_path:str,temperature:float) ->List[str]: - # event loop in a process - results = asyncio.run(run_vllm_collector(gpu_id, prompts, model_path,temperature)) + + +def start_collector(gpu_id: int, prompts: list, model_path: str, temperature: float) -> List[str]: + # event loop in a process + results = asyncio.run(run_vllm_collector(gpu_id, prompts, model_path, temperature)) return results -def main(prompts:list, model_path:str, free_gpus:List[int],temperature:float) -> None: - num_tot=len(prompts) - num_gpu=len(free_gpus) - num_per_gpu=num_tot//num_gpu - prompts_per_gpu=chunk_list(prompts,num_per_gpu) + +def main(prompts: list, model_path: str, free_gpus: List[int], temperature: float) -> None: + num_tot = len(prompts) + num_gpu = len(free_gpus) + num_per_gpu = num_tot // num_gpu + prompts_per_gpu = chunk_list(prompts, num_per_gpu) with concurrent.futures.ProcessPoolExecutor(max_workers=len(free_gpus)) as executor: futures = [] - for gpu_id,prompts_gpu in zip(free_gpus,prompts_per_gpu): - futures.append(executor.submit(start_collector, gpu_id, prompts_gpu, model_path,temperature)) + for gpu_id, prompts_gpu in zip(free_gpus, prompts_per_gpu): + futures.append(executor.submit(start_collector, gpu_id, prompts_gpu, model_path, temperature)) # get all results all_results = [] @@ -245,23 +257,19 @@ def main(prompts:list, model_path:str, free_gpus:List[int],temperature:float) - with open("/mnt/afs/wangqijian/tests/vllm_multi_gpu.txt", "w") as f: for response in all_results: f.write(f"{response}\n") - - if __name__ == "__main__": - questions=['Please describe the image.','Please describe the image.', - 'What\'s the text in the image?','What\'s the text in the image?', - 'What is in the image?','What is in the image?', - 'How many people are in the image?','How many people are in the image?', - 'What is the emotion of the main character of the image?', - 'What is the emotion of the main character of the image?', - 'How many animals are in the image?', - 'How many animals are in the image?', - 'What is the place of the image?','What is the place of the image?', - 'What is the peroson doing?','What is the peroson doing?' - ] - img_names=[ + questions = [ + 'Please describe the image.', 'Please describe the image.', 'What\'s the text in the image?', + 'What\'s the text in the image?', 'What is in the image?', 'What is in the image?', + 'How many people are in the image?', 'How many people are in the image?', + 'What is the emotion of the main character of the image?', + 'What is the emotion of the main character of the image?', 'How many animals are in the image?', + 'How many animals are in the image?', 'What is the place of the image?', 'What is the place of the image?', + 'What is the peroson doing?', 'What is the peroson doing?' + ] + img_names = [ '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2127)', '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5394)', '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(1160)', @@ -278,13 +286,13 @@ def main(prompts:list, model_path:str, free_gpus:List[int],temperature:float) - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2284)', '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4533)', '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5495)' - ] - free_gpus=get_free_gpus() + ] + free_gpus = get_free_gpus() modality = Modality.IMAGE mm_input = get_multi_modal_input(modality, img_names, questions) data = mm_input["data"] question = mm_input["question"] prompts, stop_token_ids = get_prompts_qwen(question, modality) - model_path='/mnt/afs/share/Qwen2-VL-7B' - temperature=0.5 - main(prompts,model_path,free_gpus,temperature) \ No newline at end of file + model_path = '/mnt/afs/share/Qwen2-VL-7B' + temperature = 0.5 + main(prompts, model_path, free_gpus, temperature) From f70a942d37492d5c4e3cf65aae1fb22c1a4183b1 Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Sun, 26 Jan 2025 07:03:14 +0000 Subject: [PATCH 08/18] formatted --- ding/worker/collector/tests/test_vllm_collector.py | 10 +++++----- .../cartpole/config/cartpole_dqn_ddp_config.py | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/ding/worker/collector/tests/test_vllm_collector.py b/ding/worker/collector/tests/test_vllm_collector.py index 509c733ce1..ef9252405b 100644 --- a/ding/worker/collector/tests/test_vllm_collector.py +++ b/ding/worker/collector/tests/test_vllm_collector.py @@ -10,12 +10,15 @@ model = HuggingFaceModelGenerator('/mnt/afs/share/Qwen2-VL-7B', temperature=0.5) from enum import Enum + + class Modality(Enum): IMAGE = "image" TEXT = "text" VIDEO = "video" -def get_prompts_qwen(questions: list, modality: Modality) -> Tuple[List[str],Optional[List[int]]]: + +def get_prompts_qwen(questions: list, modality: Modality) -> Tuple[List[str], Optional[List[int]]]: if modality == Modality.IMAGE: placeholder = "<|image_pad|>" elif modality == Modality.VIDEO: @@ -33,7 +36,7 @@ def get_prompts_qwen(questions: list, modality: Modality) -> Tuple[List[str],Opt ) for question in questions ] stop_token_ids = None - return prompts,stop_token_ids + return prompts, stop_token_ids def get_multi_modal_input(modality: Modality, filenames: list, questions: list) -> dict: @@ -71,9 +74,6 @@ def get_multi_modal_input(modality: Modality, filenames: list, questions: list) num_prompts = len(questions) image_repeat_prob = None - - - modality = Modality.IMAGE mm_input = get_multi_modal_input(modality, img_names, questions) diff --git a/dizoo/classic_control/cartpole/config/cartpole_dqn_ddp_config.py b/dizoo/classic_control/cartpole/config/cartpole_dqn_ddp_config.py index 82d6c673ec..a80662941a 100644 --- a/dizoo/classic_control/cartpole/config/cartpole_dqn_ddp_config.py +++ b/dizoo/classic_control/cartpole/config/cartpole_dqn_ddp_config.py @@ -63,4 +63,3 @@ from ding.entry import serial_pipeline with DDPContext(): serial_pipeline((main_config, create_config), seed=0) - From 7dae85e8fa0c0bc6db368bac52e2808e2fd747ea Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Sun, 26 Jan 2025 07:16:30 +0000 Subject: [PATCH 09/18] style_fixed --- .../collector/tests/test_vllm_collector.py | 8 +++----- .../tests/test_vllm_collector_multigpu.py | 16 +++++----------- 2 files changed, 8 insertions(+), 16 deletions(-) diff --git a/ding/worker/collector/tests/test_vllm_collector.py b/ding/worker/collector/tests/test_vllm_collector.py index ef9252405b..d8b7beaf93 100644 --- a/ding/worker/collector/tests/test_vllm_collector.py +++ b/ding/worker/collector/tests/test_vllm_collector.py @@ -4,13 +4,13 @@ from loguru import logger from ..vllm_collector import HuggingFaceModelGenerator from vllm.assets.image import ImageAsset - +from enum import Enum +import asyncio +import nest_asyncio # set a temperature > 0 to get multiple responses # note that HFModelGenerator has a parameter "mm_processor_kwargs" set to align with the settings of Qwen in default model = HuggingFaceModelGenerator('/mnt/afs/share/Qwen2-VL-7B', temperature=0.5) -from enum import Enum - class Modality(Enum): IMAGE = "image" @@ -81,8 +81,6 @@ def get_multi_modal_input(modality: Modality, filenames: list, questions: list) question = mm_input["question"] prompts, stop_token_ids = get_prompts_qwen(question, modality) -import asyncio -import nest_asyncio nest_asyncio.apply() diff --git a/ding/worker/collector/tests/test_vllm_collector_multigpu.py b/ding/worker/collector/tests/test_vllm_collector_multigpu.py index bb6c977c6b..fa8d7b0df6 100644 --- a/ding/worker/collector/tests/test_vllm_collector_multigpu.py +++ b/ding/worker/collector/tests/test_vllm_collector_multigpu.py @@ -3,6 +3,11 @@ import uuid from loguru import logger from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams, RequestOutput +from typing import List, Tuple, Optional +from vllm.assets.image import ImageAsset +from enum import Enum +import concurrent.futures +import asyncio class VllmActor: @@ -155,14 +160,6 @@ def chunk_list(original_list: list, t: int) -> List[list]: return new_list -from typing import List, Tuple, Optional -import os -from loguru import logger -from vllm.assets.image import ImageAsset -from enum import Enum -import concurrent.futures - - class Modality(Enum): IMAGE = "image" TEXT = "text" @@ -229,9 +226,6 @@ async def run_vllm_collector(gpu_id: int, prompts: List, model_path: str, temper return responses_list -import asyncio - - def start_collector(gpu_id: int, prompts: list, model_path: str, temperature: float) -> List[str]: # event loop in a process results = asyncio.run(run_vllm_collector(gpu_id, prompts, model_path, temperature)) From c642e6e4750c8d23fc097d01d54b6487f1e31a6a Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Sun, 26 Jan 2025 07:26:11 +0000 Subject: [PATCH 10/18] formatted --- ding/worker/collector/tests/test_vllm_collector_multigpu.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ding/worker/collector/tests/test_vllm_collector_multigpu.py b/ding/worker/collector/tests/test_vllm_collector_multigpu.py index fa8d7b0df6..fa0ecbf2fc 100644 --- a/ding/worker/collector/tests/test_vllm_collector_multigpu.py +++ b/ding/worker/collector/tests/test_vllm_collector_multigpu.py @@ -1,9 +1,8 @@ -from typing import List, Tuple +from typing import List, Tuple, Optional import os import uuid from loguru import logger from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams, RequestOutput -from typing import List, Tuple, Optional from vllm.assets.image import ImageAsset from enum import Enum import concurrent.futures From c45e4297306f6195884302fd6d9ac96b26a15c9c Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Sun, 26 Jan 2025 08:12:37 +0000 Subject: [PATCH 11/18] formatted --- setup.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/setup.py b/setup.py index f3d60222f1..3bc8977b46 100644 --- a/setup.py +++ b/setup.py @@ -81,6 +81,8 @@ 'einops', 'transformers', 'datasets', + 'loguru', + 'vllm' ], extras_require={ 'test': [ From 3b7903a4de595ff39bf48210ebf76d93ba18a5e6 Mon Sep 17 00:00:00 2001 From: PaParaZz1 Date: Fri, 7 Feb 2025 13:10:20 +0800 Subject: [PATCH 12/18] feature(nyz): add vllm collector interface definition --- ding/worker/collector/vllm_collector.py | 163 +++++++++++++++++++++++- 1 file changed, 162 insertions(+), 1 deletion(-) diff --git a/ding/worker/collector/vllm_collector.py b/ding/worker/collector/vllm_collector.py index 3bd138587f..a208a12f1f 100644 --- a/ding/worker/collector/vllm_collector.py +++ b/ding/worker/collector/vllm_collector.py @@ -1,8 +1,16 @@ -from typing import List, Tuple +from typing import List, Tuple, Optional, Any import os import uuid +import asyncio +import numpy as np from loguru import logger +from easydict import EasyDict from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams, RequestOutput +from transformers import AutoTokenizer + +from ding.utils.data.rlhf_online_dataset import OnlineRLDataset +from ding.utils import SERIAL_COLLECTOR_REGISTRY +from .base_serial_collector import ISerialCollector class VllmActor: @@ -145,3 +153,156 @@ async def generate( # Use raw logprobs as confidence scores confidence_scores = [x.cumulative_logprob for x in response.outputs] return [(x.text.strip(), conf) for x, conf in zip(response.outputs, confidence_scores)] + + +@SERIAL_COLLECTOR_REGISTRY.register('vllm') +class VllmCollector(ISerialCollector): + """ + Overview: + Collector implementation for vLLM-based language models (LLM/VLM). + This collector manages the interaction with vLLM models for text generation tasks. + """ + config = dict( + # (str) LLM/VLM model path + model_path='', + # (int) Maximum number of tokens to generate per request + max_tokens=1024, + # (float) Temperature for sampling, 0 means greedy decoding + temperature=0.0, + # (dict) Multimodal processor kwargs for vision-language models + mm_processor_kwargs={ + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, + }, + # Dataset related configs + # (str) Key to access the input data in the dataset + input_key='input', + # (bool) Whether to apply a chat template to the input + apply_chat_template=False, + # (str) Template for the input + input_template=None, + # (bool) Whether to shuffle the dataset + shuffle=True, + ) + + def __init__(self, cfg: EasyDict) -> None: + """ + Overview: + Initialize the VllmCollector with configuration. + Arguments: + - cfg (:obj:`EasyDict`): Configuration for the collector including model path, generation parameters, + and dataset configuration + """ + super().__init__() + self._cfg = cfg + self._envstep = 0 + + # Initialize the tokenizer and dataset + self._tokenizer = AutoTokenizer.from_pretrained(cfg.model_path) + self._dataset = OnlineRLDataset( + dataset=cfg.dataset, + tokenizer=self._tokenizer, + input_key=cfg.input_key, + apply_chat_template=cfg.apply_chat_template, + input_template=cfg.input_template, + ) + + self._model = VllmActor(model_path=cfg.model_path, mm_processor_kwargs=cfg.mm_processor_kwargs) + self.reset() + + def reset(self) -> None: + """ + Overview: + Reset the collector, including the dataset index. + """ + self._index = np.arange(len(self._dataset)) + if self._cfg.shuffle: + np.random.shuffle(self._index) + + def reset_policy(self, _model: Optional[str] = None) -> None: + """ + Overview: + Since LLM generation does not require a explicit policy and env, this function is empty. + """ + pass + + def reset_env(self, _env: Optional[Any] = None) -> None: + """ + Overview: + Since LLM generation does not require a explicit policy and env, this function is empty. + """ + pass + + def collect( + self, + n_samples: int = 100, + num_samples_per_prompt: int = 1, + train_iter: int = 0, + ) -> List[Tuple[str, float]]: + """ + Overview: + Collect generated responses from the vLLM model. + Arguments: + - n_samples (:obj:`int`): Number of prompts to generate. + - num_samples_per_prompt (:obj:`int`): Number of samples to generate per prompt. + - train_iter (:obj:`int`): Current training iteration, used for logging. + Returns: + - responses (:obj:`List[Tuple[str, float]]`): List of (generated_text, confidence_score) pairs + """ + if self._model is None: + raise RuntimeError("Model not initialized. Call `reset` method first.") + + prompt = self._dataset[self._index[:n_samples]] + # recusively update the index + self._index = self._index[n_samples:] + self._index[:n_samples] + + self._envstep += n_samples + + # Get the current event loop or create a new one + try: + loop = asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + # Run the async generate method in the event loop + return loop.run_until_complete( + self._model.generate( + prompt=prompt, + num_samples=num_samples_per_prompt, + max_tokens=self._cfg.max_tokens, + temperature=self._cfg.temperature + ) + ) + + @property + def envstep(self) -> int: + """ + Overview: + Get the current environment step count. + Returns: + - count (:obj:`int`): Current environment step count + """ + return self._envstep + + @envstep.setter + def envstep(self, value: int) -> None: + """ + Overview: + Set the current environment step count. + """ + self._envstep = value + + def close(self) -> None: + """ + Overview: + Close the collector. + """ + pass + + def __del__(self) -> None: + """ + Overview: + Destructor for the collector. + """ + self.close() From 6ca21344cd05d388387c5a82ab48214e7075fd27 Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Mon, 10 Feb 2025 07:32:18 +0000 Subject: [PATCH 13/18] added test_vllm_collector_multi_new --- ding/utils/data/rlhf_online_dataset.py | 40 +- .../data/tests/test_rlhf_online_dataset.py | 31 +- .../tests/test_vllm_collector_multi_new.py | 488 ++++++++++++++++++ .../tests/test_vllm_collector_multigpu.py | 7 +- ding/worker/collector/vllm_collector.py | 131 ++++- 5 files changed, 670 insertions(+), 27 deletions(-) create mode 100644 ding/worker/collector/tests/test_vllm_collector_multi_new.py diff --git a/ding/utils/data/rlhf_online_dataset.py b/ding/utils/data/rlhf_online_dataset.py index d307f09a32..00a81cba39 100644 --- a/ding/utils/data/rlhf_online_dataset.py +++ b/ding/utils/data/rlhf_online_dataset.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Union, Callable, Iterable +from typing import Any, Dict, Union, Callable, Iterable,List from tqdm import tqdm from torch.utils.data import Dataset from torch.distributed import get_rank @@ -17,6 +17,7 @@ def __init__( dataset: Iterable[Dict], tokenizer: AutoTokenizer, input_key: str = "input", + extra_input_keys: List[str] = [], apply_chat_template: bool = False, input_template: str = None, ) -> None: @@ -33,18 +34,27 @@ def __init__( super().__init__() self.tokenizer = tokenizer self.input_template = input_template + self.extra_input_keys = extra_input_keys if apply_chat_template: apply_chat_template = self.tokenizer.apply_chat_template self.prompts = [] + for key in extra_input_keys: + setattr(self, key, []) try: rank = get_rank() except ValueError: # not initialized yet, which is the case in unit test rank = 0 for data in tqdm(dataset, desc="Preprocessing data", disable=not rank == 0): - prompt = self._preprocess_data(data, input_template, input_key, apply_chat_template) - self.prompts.append(prompt) + processed_data = self._preprocess_data(data, input_template, input_key,extra_input_keys, apply_chat_template) + self.prompts.append(processed_data['prompt']) + for key in extra_input_keys: + getattr(self, key).append(processed_data[key]) #maybe can be imporved later + # self.prompts=np.array(self.prompts) + # for key in extra_input_keys: + # setattr(self, key, np.array(getattr(self,key))) + def __len__(self) -> int: """ @@ -55,7 +65,7 @@ def __len__(self) -> int: """ return len(self.prompts) - def __getitem__(self, idx: int) -> str: + def __getitem__(self, idx: int) -> str: #can be improved later for list indexing instead of single indexing """ Overview: Get the item at the given index. @@ -64,13 +74,24 @@ def __getitem__(self, idx: int) -> str: Returns: - item (str): The item at the given index. """ - return self.prompts[idx] + # extra inputs: usually image, video, audio, etc. + if self.extra_input_keys: + extra_inputs = {key: getattr(self, key)[idx] for key in self.extra_input_keys} + else: + extra_inputs = {} + return { + "prompt": self.prompts[idx], + "multi_modal_data":{ + **extra_inputs + } + } def _preprocess_data( self, data: Dict[str, Any], input_template: str = None, input_key: str = "input", + extra_input_keys: List[str] = [], apply_chat_template: Union[bool, Callable] = False, ) -> str: """ @@ -86,6 +107,10 @@ def _preprocess_data( Returns: - prompt (str): The formatted prompt. """ + if extra_input_keys: + extra_inputs = {key: data[key] for key in extra_input_keys} + else: + extra_inputs = {} if apply_chat_template: chat = data[input_key] if isinstance(chat, str): @@ -96,4 +121,7 @@ def _preprocess_data( prompt = data[input_key] if input_template: prompt = input_template.format(prompt) - return prompt + return { + "prompt": prompt, + **extra_inputs + } diff --git a/ding/utils/data/tests/test_rlhf_online_dataset.py b/ding/utils/data/tests/test_rlhf_online_dataset.py index cba9e7947c..88c2c70afe 100644 --- a/ding/utils/data/tests/test_rlhf_online_dataset.py +++ b/ding/utils/data/tests/test_rlhf_online_dataset.py @@ -1,27 +1,38 @@ import pytest from datasets import load_dataset -from transformers import AutoTokenizer from ding.utils.data import OnlineRLDataset - - +from transformers import AutoTokenizer +IMG_CONTEXT_TOKEN = '' +IMG_START_TOKEN = '' +IMG_END_TOKEN = '' +IMG_CONTEXT_NUM = 10 # user-defined number of image patches in the context @pytest.fixture def dataset(): # Load the dataset - hf_dataset = load_dataset("cat-searcher/minif2f-lean4")['validation'] + hf_dataset = load_dataset("MMInstruction/VL-RewardBench",split='test') + hf_dataset0 = hf_dataset.map( + lambda x: { + "query": f"{IMG_START_TOKEN}{IMG_CONTEXT_TOKEN * IMG_CONTEXT_NUM}{IMG_END_TOKEN}\n{x['query']}", + "image": x["image"], + } + ) + # shuffle the dataset + hf_dataset = hf_dataset0.shuffle(seed=42) print(hf_dataset) return hf_dataset @pytest.fixture def tokenizer(): - return AutoTokenizer.from_pretrained("Qwen/Qwen2.5-Coder-7B") + return AutoTokenizer.from_pretrained("OpenGVLab/InternVL2_5-4B") @pytest.mark.unittest def test_onlinerl_dataset_initialization(dataset, tokenizer): # Initialize OnlineRLDataset online_rl_dataset = OnlineRLDataset( - dataset=dataset, tokenizer=tokenizer, input_key="formal_statement", apply_chat_template=True + dataset=dataset, tokenizer=tokenizer, input_key="query", + extra_input_keys=["image"], apply_chat_template=True ) # Check if the dataset is initialized correctly assert len(online_rl_dataset) == len(dataset) @@ -31,9 +42,13 @@ def test_onlinerl_dataset_initialization(dataset, tokenizer): def test_onlinerl_dataset_getitem(dataset, tokenizer): # Initialize OnlineRLDataset online_rl_dataset = OnlineRLDataset( - dataset=dataset, tokenizer=tokenizer, input_key="formal_statement", apply_chat_template=True + dataset=dataset, tokenizer=tokenizer, input_key="query", + extra_input_keys=["image"], apply_chat_template=True ) # Check if __getitem__ returns the expected formatted prompt item = online_rl_dataset[0] print(item) - assert isinstance(item, str) + assert "prompt" in item + assert "multi_modal_data" in item + assert "image" in item['multi_modal_data'] + assert isinstance(item['prompt'],str) diff --git a/ding/worker/collector/tests/test_vllm_collector_multi_new.py b/ding/worker/collector/tests/test_vllm_collector_multi_new.py new file mode 100644 index 0000000000..114233f54e --- /dev/null +++ b/ding/worker/collector/tests/test_vllm_collector_multi_new.py @@ -0,0 +1,488 @@ +from typing import Any, Dict, Union, Callable, Iterable,List +from tqdm import tqdm +from torch.utils.data import Dataset +from torch.distributed import get_rank +from transformers import AutoTokenizer +from typing import List, Tuple, Optional, Any +import os +import uuid +import asyncio +import numpy as np +from loguru import logger +from easydict import EasyDict +from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams, RequestOutput +from ding.utils import SERIAL_COLLECTOR_REGISTRY +from ding.worker.collector.base_serial_collector import ISerialCollector +from datasets import load_dataset +from ding.utils.data import OnlineRLDataset +import copy +import concurrent.futures + + +class VllmActor: + def __init__(self, model_path: str,mm_processor_kwargs: dict,free_gpus:list) -> None: + """ + Overview: + Initialize the vLLM actor. For more details, please refer to https://docs.vllm.ai/en/stable. + Arguments: + - model_path (str): The path to the language model. + """ + self.free_gpus = free_gpus + self.num_gpus = len(self.free_gpus) + assert self.num_gpus > 0, "No GPUs found" + # Set CUDA_VISIBLE_DEVICES to use only free GPUs + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, self.free_gpus)) + self.model_path = model_path + self.mm_processor_kwargs=mm_processor_kwargs + self._initialize() + + def _initialize(self) -> None: + """ + Overview: + Initialize the vLLM actor with a series of arguments. + """ + logger.info("Initializing vLLM") + # TODO: Try other options in https://docs.vllm.ai/en/stable/models/engine_args.html#engine-args. + engine_args = AsyncEngineArgs( + model=self.model_path, + tensor_parallel_size=self.num_gpus, + max_num_batched_tokens=8192, + max_model_len=8192, + # enable_chunked_prefill=True, + max_num_seqs=5, + # Note - mm_processor_kwargs can also be passed to generate/chat calls + mm_processor_kwargs=self.mm_processor_kwargs, + ) + self.engine = AsyncLLMEngine.from_engine_args(engine_args) + + async def generate(self, prompt, num_samples: int, max_tokens: int, temperature: float = 0) -> RequestOutput: + """ + Overview: + Generate tactics for the current state. + Arguments: + - prompt : The prompt to generate tactics. + - num_samples (int): The number of tactics to generate. + - max_tokens (int): The maximum number of tokens to generate. + - temperature (float): The temperature for the language model, default to 0. + Returns: + - RequestOutput: The generated tactics and their log-probabilities. + """ + sampling_params = SamplingParams( + n=num_samples, + max_tokens=max_tokens, + temperature=temperature, + ) + + # Using async iterator to handle vLLM's generation process + # 1. vLLM's generate method is asynchronous to prevent blocking while waiting for model outputs + # 2. async for allows streaming the generated outputs incrementally instead of waiting for all results + # 3. This approach is particularly suitable for LLM inference which can be time-consuming + # 4. The request_id ensures unique identification for each generation request + async for oup in self.engine.generate( + prompt, sampling_params, request_id=str(uuid.uuid4().hex) + ): + final_output = oup + return final_output + + +@SERIAL_COLLECTOR_REGISTRY.register('vllm') +class VllmCollector(ISerialCollector): + """ + Overview: + Collector implementation for vLLM-based language models (LLM/VLM). + This collector manages the interaction with vLLM models for text generation tasks. + """ + config = dict( + # (str) LLM/VLM model path + model_path='', + # (int) Maximum number of tokens to generate per request + max_tokens=1024, + # (float) Temperature for sampling, 0 means greedy decoding + temperature=0.0, + # (dict) Multimodal processor kwargs for vision-language models + mm_processor_kwargs={ + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, + }, + # Dataset related configs + # (str) Key to access the input data in the dataset + input_key='input', + # (bool) Whether to apply a chat template to the input + apply_chat_template=False, + # (str) Template for the input + input_template=None, + # (bool) Whether to shuffle the dataset + shuffle=True, + ) + + def __init__(self, cfg: EasyDict) -> None: + """ + Overview: + Initialize the VllmCollector with configuration. + Arguments: + - cfg (:obj:`EasyDict`): Configuration for the collector including model path, generation parameters, + and dataset configuration + """ + super().__init__() + self._cfg = cfg + self._envstep = 0 + + # Initialize the tokenizer and dataset + self._tokenizer = AutoTokenizer.from_pretrained(cfg.model_path) + self._dataset = OnlineRLDataset( + dataset=cfg.dataset, + tokenizer=self._tokenizer, + input_key=cfg.input_key, + apply_chat_template=cfg.apply_chat_template, + input_template=cfg.input_template, + extra_input_keys=cfg.extra_input_keys + ) + + self._model = VllmActor(model_path=cfg.model_path, mm_processor_kwargs=cfg.mm_processor_kwargs,free_gpus=cfg.free_gpus) + self.reset() + + def reset(self) -> None: + """ + Overview: + Reset the collector, including the dataset index. + """ + self._index = np.arange(len(self._dataset)) + if self._cfg.shuffle: + np.random.shuffle(self._index) + + def reset_policy(self, _model: Optional[str] = None) -> None: + """ + Overview: + Since LLM generation does not require a explicit policy and env, this function is empty. + """ + pass + + def reset_env(self, _env: Optional[Any] = None) -> None: + """ + Overview: + Since LLM generation does not require a explicit policy and env, this function is empty. + """ + pass + async def _generate_for_prompt(self, prompt: str, num_samples_per_prompt: int) -> List[Tuple[str, float]]: + return await self._model.generate( + prompt=prompt, + num_samples=num_samples_per_prompt, + max_tokens=self._cfg.max_tokens, + temperature=self._cfg.temperature + ) + def collect( + self, + n_samples: int = 100, + num_samples_per_prompt: int = 1, + train_iter: int = 0, + ) -> List[Tuple[str, float]]: + """ + Overview: + Collect generated responses from the vLLM model. + Arguments: + - n_samples (:obj:`int`): Number of prompts to generate. + - num_samples_per_prompt (:obj:`int`): Number of samples to generate per prompt. + - train_iter (:obj:`int`): Current training iteration, used for logging. + Returns: + - responses (:obj:`List[Tuple[str, float]]`): List of (generated_text, confidence_score) pairs + """ + if self._model is None: + raise RuntimeError("Model not initialized. Call `reset` method first.") + + prompts=[] + for id in self._index[:n_samples]: + prompts.append(self._dataset[id]) + # recusively update the index + self._index = np.concatenate((self._index[n_samples:],self._index[:n_samples])) + + self._envstep += n_samples + + # Get the current event loop or create a new one + try: + loop = asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + # Run the async generate method in the event loop + # Create a list of tasks for each prompt + tasks = [self._generate_for_prompt(prompt, num_samples_per_prompt) for prompt in prompts] + + # Run all tasks concurrently and collect results + results = loop.run_until_complete(asyncio.gather(*tasks)) + + # Map prompts to their corresponding results + responses = {prompt["prompt"]: result for prompt, result in zip(prompts, results)} + + return responses + + def sync_collect( + self, + n_samples: int = 100, + num_samples_per_prompt: int = 1, + train_iter: int = 0, + ) -> List[Tuple[str, float]]: + """ + Overview: + Collect generated responses from the vLLM model. + Arguments: + - n_samples (:obj:`int`): Number of prompts to generate. + - num_samples_per_prompt (:obj:`int`): Number of samples to generate per prompt. + - train_iter (:obj:`int`): Current training iteration, used for logging. + Returns: + - responses (:obj:`List[Tuple[str, float]]`): List of (generated_text, confidence_score) pairs + """ + if self._model is None: + raise RuntimeError("Model not initialized. Call `reset` method first.") + + prompts=[] + for id in self._index[:n_samples]: + prompts.append(self._dataset[id]) + # recusively update the index + self._index = np.concatenate((self._index[n_samples:],self._index[:n_samples])) + + self._envstep += n_samples + + # Get the current event loop or create a new one + try: + loop = asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + # Run the async generate method in the event loop + results = {} + for prompt in prompts: + # Run the async generate method in the event loop for each prompt + result = loop.run_until_complete( + self._model.generate( + prompt=prompt, + num_samples=num_samples_per_prompt, + max_tokens=self._cfg.max_tokens, + temperature=self._cfg.temperature + ) + ) + results[prompt['prompt']] = result + + return results + + def collect_prompts( + self, + n_samples: int = 100, + num_samples_per_prompt: int = 1, + train_iter: int = 0, + ) -> List[Tuple[str, float]]: + """ + Overview: + Collect generated responses from the vLLM model. + Arguments: + - n_samples (:obj:`int`): Number of prompts to generate. + - num_samples_per_prompt (:obj:`int`): Number of samples to generate per prompt. + - train_iter (:obj:`int`): Current training iteration, used for logging. + Returns: + - responses (:obj:`List[Tuple[str, float]]`): List of (generated_text, confidence_score) pairs + """ + if self._model is None: + raise RuntimeError("Model not initialized. Call `reset` method first.") + + prompts=[] + for id in self._index[:n_samples]: + prompts.append(self._dataset[id]) + # recusively update the index + self._index = np.concatenate((self._index[n_samples:],self._index[:n_samples])) + + self._envstep += n_samples + + # Get the current event loop or create a new one + try: + loop = asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + # Run the async generate method in the event loop + results = {} + tasks=[] + for prompt in prompts: + for _ in range(num_samples_per_prompt): + # Run the async generate method in the event loop for each prompt + tasks.append(self._generate_for_prompt(prompt, num_samples_per_prompt=1)) + results_list = loop.run_until_complete(asyncio.gather(*tasks)) + for i,prompt in enumerate(prompts): + results[prompt['prompt']]=[] + for result in results_list[i*num_samples_per_prompt:(i+1)*num_samples_per_prompt]: + results[prompt['prompt']].append(result.outputs[0].text) + return results + + + + @property + def envstep(self) -> int: + """ + Overview: + Get the current environment step count. + Returns: + - count (:obj:`int`): Current environment step count + """ + return self._envstep + + @envstep.setter + def envstep(self, value: int) -> None: + """ + Overview: + Set the current environment step count. + """ + self._envstep = value + + def close(self) -> None: + """ + Overview: + Close the collector. + """ + pass + + def __del__(self) -> None: + """ + Overview: + Destructor for the collector. + """ + self.close() + + + + +def get_free_gpus() -> List[int]: + """ + Overview: + Get IDs of GPUs with free memory. + Returns: + - List[int]: The IDs of the free GPUs. + """ + try: + # Get GPU memory usage using nvidia-smi + gpu_stats = os.popen('nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader')\ + .readlines() + free_gpus = [] + + for gpu_id, stats in enumerate(gpu_stats): + mem_used, mem_total = map(int, stats.strip().split(',')) + # Consider GPU as free if less than 5% memory is used + if mem_used / mem_total < 0.05: + free_gpus.append(gpu_id) + + return free_gpus if free_gpus else [0] # Default to GPU 0 if no free GPUs found + except Exception: + logger.warning("Failed to get GPU stats, defaulting to GPU 0") + return [0] + +def chunk_list(original_list, t): + # chunk a list into sub_lists + new_list = [original_list[i:i + t] for i in range(0, len(original_list), t)] + return new_list + + +# prepare dataset +IMG_START_TOKEN = '<|vision_start|>' +IMG_END_TOKEN = '<|vision_end|>' +PLACE_HOLDER='<|image_pad|>' +def dataset(num=None): + # Load the dataset + hf_dataset = load_dataset("/mnt/afs/wangqijian/data/rlhf_dataset_test/VL-RewardBench",split='test') + hf_dataset0 = hf_dataset.map( + lambda x: { + "query": f"{IMG_START_TOKEN}{PLACE_HOLDER}{IMG_END_TOKEN}{x['query']}", + "image": x["image"], + } + ) + # shuffle the dataset + hf_dataset = hf_dataset0.shuffle(seed=42) + if num is None: + return hf_dataset + else: + ret_data=[] + for i in range(0,num): + ret_data.append(hf_dataset[i]) + return ret_data + + +def run_vllm_collector(config): + # set GPU for current process + gpu_ids = ",".join(map(str, config.free_gpus)) + os.environ["CUDA_VISIBLE_DEVICES"] = gpu_ids + collector = VllmCollector(config) # 实例化模型 + #ret=collector.collect(n_samples=2,num_samples_per_prompt=4) + ret=collector.collect(n_samples=2,num_samples_per_prompt=4) + return ret + + +def start_collector(config): + # collect within the process + # results:a dict, basic form: + #{"prompt_0":[ans_0,ans_1,...,ans_n],"prompt_1":[ans_0,ans_1,...,ans_n],...} + results = run_vllm_collector(config) + return results + +def main(tot_dataset, free_gpus,config): + num_tot=len(tot_dataset) + num_gpu=len(free_gpus) + num_per_gpu=num_tot//num_gpu + prompts_per_gpu=chunk_list(tot_dataset,num_per_gpu) + with concurrent.futures.ProcessPoolExecutor(max_workers=len(free_gpus)) as executor: + futures = [] + for gpu_id,prompts_gpu in zip(free_gpus,prompts_per_gpu): + config_per_gpu=copy.deepcopy(config) + config_per_gpu.dataset=prompts_gpu + config_per_gpu.free_gpus=[gpu_id] + futures.append(executor.submit(start_collector, config_per_gpu)) + + # collect all results + all_results = [] + for future in concurrent.futures.as_completed(futures): + all_results.append(future.result()) + + # save results + with open(config.save_path, "w") as f: + for response in all_results: + print(response) + for prompt in list(response.keys()): + f.write(f"{prompt}:\n") + for i,output in enumerate(response[prompt].outputs): + f.write(f'output_{i}:\n') + f.write(f"{output.text}\n") + + +test_dataset=dataset(num=96) +free_gpus=get_free_gpus() +config = EasyDict( + # (str) LLM/VLM model path + model_path='/mnt/afs/share/Qwen2-VL-7B', + # (int) Maximum number of tokens to generate per request + max_tokens=4096, + # (float) Temperature for sampling, 0 means greedy decoding + temperature=1.0, + # (dict) Multimodal processor kwargs for vision-language models + mm_processor_kwargs={ + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, + },# defaul set to align with Qwen2-VL-7B + # Dataset related configs + # dataset=test_dataset, + # dataset is defined for each gpu respectively + # (str) Key to access the input data in the dataset + input_key='query', + # (bool) Whether to apply a chat template to the input + apply_chat_template=True, + # (str) Template for the input + input_template=None, + # (bool) Whether to shuffle the dataset + shuffle=True, + extra_input_keys=['image'], + # free_gpus is defined for each gpu respectively + # save_path is the file to store the output + save_path="your_save_path" + ) + + + + +main(test_dataset,free_gpus,config) \ No newline at end of file diff --git a/ding/worker/collector/tests/test_vllm_collector_multigpu.py b/ding/worker/collector/tests/test_vllm_collector_multigpu.py index fa0ecbf2fc..1c7d79e1bb 100644 --- a/ding/worker/collector/tests/test_vllm_collector_multigpu.py +++ b/ding/worker/collector/tests/test_vllm_collector_multigpu.py @@ -244,12 +244,13 @@ def main(prompts: list, model_path: str, free_gpus: List[int], temperature: floa # get all results all_results = [] for future in concurrent.futures.as_completed(futures): - all_results.extend(future.result()) + all_results.append(future.result()) # save results with open("/mnt/afs/wangqijian/tests/vllm_multi_gpu.txt", "w") as f: - for response in all_results: - f.write(f"{response}\n") + for responses in all_results: + for response in responses: + f.write(f"{response}\n") if __name__ == "__main__": diff --git a/ding/worker/collector/vllm_collector.py b/ding/worker/collector/vllm_collector.py index a208a12f1f..d1c086410c 100644 --- a/ding/worker/collector/vllm_collector.py +++ b/ding/worker/collector/vllm_collector.py @@ -205,6 +205,7 @@ def __init__(self, cfg: EasyDict) -> None: input_key=cfg.input_key, apply_chat_template=cfg.apply_chat_template, input_template=cfg.input_template, + extra_input_keys=cfg.extra_input_keys ) self._model = VllmActor(model_path=cfg.model_path, mm_processor_kwargs=cfg.mm_processor_kwargs) @@ -232,7 +233,13 @@ def reset_env(self, _env: Optional[Any] = None) -> None: Since LLM generation does not require a explicit policy and env, this function is empty. """ pass - + async def _generate_for_prompt(self, prompt: str, num_samples_per_prompt: int) -> List[Tuple[str, float]]: + return await self._model.generate( + prompt=prompt, + num_samples=num_samples_per_prompt, + max_tokens=self._cfg.max_tokens, + temperature=self._cfg.temperature + ) def collect( self, n_samples: int = 100, @@ -252,9 +259,57 @@ def collect( if self._model is None: raise RuntimeError("Model not initialized. Call `reset` method first.") - prompt = self._dataset[self._index[:n_samples]] + prompts=[] + for id in self._index[:n_samples]: + prompts.append(self._dataset[id]) + # recusively update the index + self._index = np.concatenate((self._index[n_samples:],self._index[:n_samples])) + + self._envstep += n_samples + + # Get the current event loop or create a new one + try: + loop = asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + # Run the async generate method in the event loop + # Create a list of tasks for each prompt + tasks = [self._generate_for_prompt(prompt, num_samples_per_prompt) for prompt in prompts] + + # Run all tasks concurrently and collect results + results = loop.run_until_complete(asyncio.gather(*tasks)) + + # Map prompts to their corresponding results + responses = {prompt["prompt"]: result for prompt, result in zip(prompts, results)} + + return responses + + def sync_collect( + self, + n_samples: int = 100, + num_samples_per_prompt: int = 1, + train_iter: int = 0, + ) -> List[Tuple[str, float]]: + """ + Overview: + Collect generated responses from the vLLM model. + Arguments: + - n_samples (:obj:`int`): Number of prompts to generate. + - num_samples_per_prompt (:obj:`int`): Number of samples to generate per prompt. + - train_iter (:obj:`int`): Current training iteration, used for logging. + Returns: + - responses (:obj:`List[Tuple[str, float]]`): List of (generated_text, confidence_score) pairs + """ + if self._model is None: + raise RuntimeError("Model not initialized. Call `reset` method first.") + + prompts=[] + for id in self._index[:n_samples]: + prompts.append(self._dataset[id]) # recusively update the index - self._index = self._index[n_samples:] + self._index[:n_samples] + self._index = np.concatenate((self._index[n_samples:],self._index[:n_samples])) self._envstep += n_samples @@ -266,14 +321,70 @@ def collect( asyncio.set_event_loop(loop) # Run the async generate method in the event loop - return loop.run_until_complete( - self._model.generate( - prompt=prompt, - num_samples=num_samples_per_prompt, - max_tokens=self._cfg.max_tokens, - temperature=self._cfg.temperature + results = {} + for prompt in prompts: + # Run the async generate method in the event loop for each prompt + result = loop.run_until_complete( + self._model.generate( + prompt=prompt, + num_samples=num_samples_per_prompt, + max_tokens=self._cfg.max_tokens, + temperature=self._cfg.temperature + ) ) - ) + results[prompt['prompt']] = result + + return results + + def collect_prompts( + self, + n_samples: int = 100, + num_samples_per_prompt: int = 1, + train_iter: int = 0, + ) -> List[Tuple[str, float]]: + """ + Overview: + Collect generated responses from the vLLM model. + Arguments: + - n_samples (:obj:`int`): Number of prompts to generate. + - num_samples_per_prompt (:obj:`int`): Number of samples to generate per prompt. + - train_iter (:obj:`int`): Current training iteration, used for logging. + Returns: + - responses (:obj:`List[Tuple[str, float]]`): List of (generated_text, confidence_score) pairs + """ + if self._model is None: + raise RuntimeError("Model not initialized. Call `reset` method first.") + + prompts=[] + for id in self._index[:n_samples]: + prompts.append(self._dataset[id]) + # recusively update the index + self._index = np.concatenate((self._index[n_samples:],self._index[:n_samples])) + + self._envstep += n_samples + + # Get the current event loop or create a new one + try: + loop = asyncio.get_event_loop() + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + # Run the async generate method in the event loop + results = {} + tasks=[] + for prompt in prompts: + for _ in range(num_samples_per_prompt): + # Run the async generate method in the event loop for each prompt + tasks.append(self._generate_for_prompt(prompt, num_samples_per_prompt=1)) + results_list = loop.run_until_complete(asyncio.gather(*tasks)) + for i,prompt in enumerate(prompts): + results[prompt['prompt']]=[] + for result in results_list[i*4:(i+1)*4]: + results[prompt['prompt']].append(result.outputs[0].text) + return results + + @property def envstep(self) -> int: From 606fd55fd8e626907b606ec7b1c3a083968debce Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Mon, 10 Feb 2025 07:34:55 +0000 Subject: [PATCH 14/18] added test_vllm_collector_multi_new --- .../worker/collector/tests/test_vllm_collector_multi_new.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/ding/worker/collector/tests/test_vllm_collector_multi_new.py b/ding/worker/collector/tests/test_vllm_collector_multi_new.py index 114233f54e..e61ac8fe2e 100644 --- a/ding/worker/collector/tests/test_vllm_collector_multi_new.py +++ b/ding/worker/collector/tests/test_vllm_collector_multi_new.py @@ -1,7 +1,3 @@ -from typing import Any, Dict, Union, Callable, Iterable,List -from tqdm import tqdm -from torch.utils.data import Dataset -from torch.distributed import get_rank from transformers import AutoTokenizer from typing import List, Tuple, Optional, Any import os @@ -451,7 +447,7 @@ def main(tot_dataset, free_gpus,config): f.write(f"{output.text}\n") -test_dataset=dataset(num=96) +test_dataset=dataset(num=16) free_gpus=get_free_gpus() config = EasyDict( # (str) LLM/VLM model path From 45487495a3dda5f0be4646324917a24a6f2ee5e2 Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Mon, 10 Feb 2025 08:05:57 +0000 Subject: [PATCH 15/18] formatted --- .flake8 | 2 +- ding/utils/data/rlhf_online_dataset.py | 25 ++- .../data/tests/test_rlhf_online_dataset.py | 12 +- .../tests/test_vllm_collector_multi_new.py | 158 +++++++++--------- ding/worker/collector/vllm_collector.py | 38 ++--- 5 files changed, 116 insertions(+), 119 deletions(-) diff --git a/.flake8 b/.flake8 index 9d86ca5e8c..8b176d3853 100644 --- a/.flake8 +++ b/.flake8 @@ -1,4 +1,4 @@ [flake8] ignore=F401,F841,F403,E226,E126,W504,E265,E722,W503,W605,E741,E122,E731 max-line-length=120 -statistics + diff --git a/ding/utils/data/rlhf_online_dataset.py b/ding/utils/data/rlhf_online_dataset.py index 00a81cba39..08f6838d61 100644 --- a/ding/utils/data/rlhf_online_dataset.py +++ b/ding/utils/data/rlhf_online_dataset.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, Union, Callable, Iterable,List +from typing import Any, Dict, Union, Callable, Iterable, List from tqdm import tqdm from torch.utils.data import Dataset from torch.distributed import get_rank @@ -47,14 +47,16 @@ def __init__( except ValueError: # not initialized yet, which is the case in unit test rank = 0 for data in tqdm(dataset, desc="Preprocessing data", disable=not rank == 0): - processed_data = self._preprocess_data(data, input_template, input_key,extra_input_keys, apply_chat_template) + processed_data = self._preprocess_data( + data, input_template, input_key, extra_input_keys, apply_chat_template + ) self.prompts.append(processed_data['prompt']) + #maybe can be imporved later for key in extra_input_keys: - getattr(self, key).append(processed_data[key]) #maybe can be imporved later + getattr(self, key).append(processed_data[key]) # self.prompts=np.array(self.prompts) # for key in extra_input_keys: # setattr(self, key, np.array(getattr(self,key))) - def __len__(self) -> int: """ @@ -65,7 +67,8 @@ def __len__(self) -> int: """ return len(self.prompts) - def __getitem__(self, idx: int) -> str: #can be improved later for list indexing instead of single indexing + def __getitem__(self, idx: int) -> str: + #can be improved later for list indexing instead of single indexing """ Overview: Get the item at the given index. @@ -79,12 +82,7 @@ def __getitem__(self, idx: int) -> str: #can be improved later for list indexing extra_inputs = {key: getattr(self, key)[idx] for key in self.extra_input_keys} else: extra_inputs = {} - return { - "prompt": self.prompts[idx], - "multi_modal_data":{ - **extra_inputs - } - } + return {"prompt": self.prompts[idx], "multi_modal_data": {**extra_inputs}} def _preprocess_data( self, @@ -121,7 +119,4 @@ def _preprocess_data( prompt = data[input_key] if input_template: prompt = input_template.format(prompt) - return { - "prompt": prompt, - **extra_inputs - } + return {"prompt": prompt, **extra_inputs} diff --git a/ding/utils/data/tests/test_rlhf_online_dataset.py b/ding/utils/data/tests/test_rlhf_online_dataset.py index 88c2c70afe..1e12a777dd 100644 --- a/ding/utils/data/tests/test_rlhf_online_dataset.py +++ b/ding/utils/data/tests/test_rlhf_online_dataset.py @@ -6,10 +6,12 @@ IMG_START_TOKEN = '' IMG_END_TOKEN = '' IMG_CONTEXT_NUM = 10 # user-defined number of image patches in the context + + @pytest.fixture def dataset(): # Load the dataset - hf_dataset = load_dataset("MMInstruction/VL-RewardBench",split='test') + hf_dataset = load_dataset("MMInstruction/VL-RewardBench", split='test') hf_dataset0 = hf_dataset.map( lambda x: { "query": f"{IMG_START_TOKEN}{IMG_CONTEXT_TOKEN * IMG_CONTEXT_NUM}{IMG_END_TOKEN}\n{x['query']}", @@ -31,8 +33,7 @@ def tokenizer(): def test_onlinerl_dataset_initialization(dataset, tokenizer): # Initialize OnlineRLDataset online_rl_dataset = OnlineRLDataset( - dataset=dataset, tokenizer=tokenizer, input_key="query", - extra_input_keys=["image"], apply_chat_template=True + dataset=dataset, tokenizer=tokenizer, input_key="query", extra_input_keys=["image"], apply_chat_template=True ) # Check if the dataset is initialized correctly assert len(online_rl_dataset) == len(dataset) @@ -42,8 +43,7 @@ def test_onlinerl_dataset_initialization(dataset, tokenizer): def test_onlinerl_dataset_getitem(dataset, tokenizer): # Initialize OnlineRLDataset online_rl_dataset = OnlineRLDataset( - dataset=dataset, tokenizer=tokenizer, input_key="query", - extra_input_keys=["image"], apply_chat_template=True + dataset=dataset, tokenizer=tokenizer, input_key="query", extra_input_keys=["image"], apply_chat_template=True ) # Check if __getitem__ returns the expected formatted prompt item = online_rl_dataset[0] @@ -51,4 +51,4 @@ def test_onlinerl_dataset_getitem(dataset, tokenizer): assert "prompt" in item assert "multi_modal_data" in item assert "image" in item['multi_modal_data'] - assert isinstance(item['prompt'],str) + assert isinstance(item['prompt'], str) diff --git a/ding/worker/collector/tests/test_vllm_collector_multi_new.py b/ding/worker/collector/tests/test_vllm_collector_multi_new.py index e61ac8fe2e..affb35a5e9 100644 --- a/ding/worker/collector/tests/test_vllm_collector_multi_new.py +++ b/ding/worker/collector/tests/test_vllm_collector_multi_new.py @@ -16,7 +16,8 @@ class VllmActor: - def __init__(self, model_path: str,mm_processor_kwargs: dict,free_gpus:list) -> None: + + def __init__(self, model_path: str, mm_processor_kwargs: dict, free_gpus: list) -> None: """ Overview: Initialize the vLLM actor. For more details, please refer to https://docs.vllm.ai/en/stable. @@ -29,7 +30,7 @@ def __init__(self, model_path: str,mm_processor_kwargs: dict,free_gpus:list) -> # Set CUDA_VISIBLE_DEVICES to use only free GPUs os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, self.free_gpus)) self.model_path = model_path - self.mm_processor_kwargs=mm_processor_kwargs + self.mm_processor_kwargs = mm_processor_kwargs self._initialize() def _initialize(self) -> None: @@ -68,7 +69,7 @@ async def generate(self, prompt, num_samples: int, max_tokens: int, temperature: max_tokens=max_tokens, temperature=temperature, ) - + # Using async iterator to handle vLLM's generation process # 1. vLLM's generate method is asynchronous to prevent blocking while waiting for model outputs # 2. async for allows streaming the generated outputs incrementally instead of waiting for all results @@ -134,7 +135,9 @@ def __init__(self, cfg: EasyDict) -> None: extra_input_keys=cfg.extra_input_keys ) - self._model = VllmActor(model_path=cfg.model_path, mm_processor_kwargs=cfg.mm_processor_kwargs,free_gpus=cfg.free_gpus) + self._model = VllmActor( + model_path=cfg.model_path, mm_processor_kwargs=cfg.mm_processor_kwargs, free_gpus=cfg.free_gpus + ) self.reset() def reset(self) -> None: @@ -159,6 +162,7 @@ def reset_env(self, _env: Optional[Any] = None) -> None: Since LLM generation does not require a explicit policy and env, this function is empty. """ pass + async def _generate_for_prompt(self, prompt: str, num_samples_per_prompt: int) -> List[Tuple[str, float]]: return await self._model.generate( prompt=prompt, @@ -166,6 +170,7 @@ async def _generate_for_prompt(self, prompt: str, num_samples_per_prompt: int) - max_tokens=self._cfg.max_tokens, temperature=self._cfg.temperature ) + def collect( self, n_samples: int = 100, @@ -185,11 +190,11 @@ def collect( if self._model is None: raise RuntimeError("Model not initialized. Call `reset` method first.") - prompts=[] + prompts = [] for id in self._index[:n_samples]: prompts.append(self._dataset[id]) # recusively update the index - self._index = np.concatenate((self._index[n_samples:],self._index[:n_samples])) + self._index = np.concatenate((self._index[n_samples:], self._index[:n_samples])) self._envstep += n_samples @@ -211,7 +216,7 @@ def collect( responses = {prompt["prompt"]: result for prompt, result in zip(prompts, results)} return responses - + def sync_collect( self, n_samples: int = 100, @@ -231,11 +236,11 @@ def sync_collect( if self._model is None: raise RuntimeError("Model not initialized. Call `reset` method first.") - prompts=[] + prompts = [] for id in self._index[:n_samples]: prompts.append(self._dataset[id]) # recusively update the index - self._index = np.concatenate((self._index[n_samples:],self._index[:n_samples])) + self._index = np.concatenate((self._index[n_samples:], self._index[:n_samples])) self._envstep += n_samples @@ -249,7 +254,7 @@ def sync_collect( # Run the async generate method in the event loop results = {} for prompt in prompts: - # Run the async generate method in the event loop for each prompt + # Run the async generate method in the event loop for each prompt result = loop.run_until_complete( self._model.generate( prompt=prompt, @@ -260,8 +265,8 @@ def sync_collect( ) results[prompt['prompt']] = result - return results - + return results + def collect_prompts( self, n_samples: int = 100, @@ -281,11 +286,11 @@ def collect_prompts( if self._model is None: raise RuntimeError("Model not initialized. Call `reset` method first.") - prompts=[] + prompts = [] for id in self._index[:n_samples]: prompts.append(self._dataset[id]) # recusively update the index - self._index = np.concatenate((self._index[n_samples:],self._index[:n_samples])) + self._index = np.concatenate((self._index[n_samples:], self._index[:n_samples])) self._envstep += n_samples @@ -298,19 +303,17 @@ def collect_prompts( # Run the async generate method in the event loop results = {} - tasks=[] + tasks = [] for prompt in prompts: - for _ in range(num_samples_per_prompt): - # Run the async generate method in the event loop for each prompt + for _ in range(num_samples_per_prompt): + # Run the async generate method in the event loop for each prompt tasks.append(self._generate_for_prompt(prompt, num_samples_per_prompt=1)) results_list = loop.run_until_complete(asyncio.gather(*tasks)) - for i,prompt in enumerate(prompts): - results[prompt['prompt']]=[] - for result in results_list[i*num_samples_per_prompt:(i+1)*num_samples_per_prompt]: + for i, prompt in enumerate(prompts): + results[prompt['prompt']] = [] + for result in results_list[i * num_samples_per_prompt:(i + 1) * num_samples_per_prompt]: results[prompt['prompt']].append(result.outputs[0].text) - return results - - + return results @property def envstep(self) -> int: @@ -343,10 +346,8 @@ def __del__(self) -> None: Destructor for the collector. """ self.close() - - - - + + def get_free_gpus() -> List[int]: """ Overview: @@ -370,7 +371,8 @@ def get_free_gpus() -> List[int]: except Exception: logger.warning("Failed to get GPU stats, defaulting to GPU 0") return [0] - + + def chunk_list(original_list, t): # chunk a list into sub_lists new_list = [original_list[i:i + t] for i in range(0, len(original_list), t)] @@ -380,10 +382,12 @@ def chunk_list(original_list, t): # prepare dataset IMG_START_TOKEN = '<|vision_start|>' IMG_END_TOKEN = '<|vision_end|>' -PLACE_HOLDER='<|image_pad|>' +PLACE_HOLDER = '<|image_pad|>' + + def dataset(num=None): # Load the dataset - hf_dataset = load_dataset("/mnt/afs/wangqijian/data/rlhf_dataset_test/VL-RewardBench",split='test') + hf_dataset = load_dataset("/mnt/afs/wangqijian/data/rlhf_dataset_test/VL-RewardBench", split='test') hf_dataset0 = hf_dataset.map( lambda x: { "query": f"{IMG_START_TOKEN}{PLACE_HOLDER}{IMG_END_TOKEN}{x['query']}", @@ -395,8 +399,8 @@ def dataset(num=None): if num is None: return hf_dataset else: - ret_data=[] - for i in range(0,num): + ret_data = [] + for i in range(0, num): ret_data.append(hf_dataset[i]) return ret_data @@ -407,7 +411,7 @@ def run_vllm_collector(config): os.environ["CUDA_VISIBLE_DEVICES"] = gpu_ids collector = VllmCollector(config) # 实例化模型 #ret=collector.collect(n_samples=2,num_samples_per_prompt=4) - ret=collector.collect(n_samples=2,num_samples_per_prompt=4) + ret = collector.collect(n_samples=2, num_samples_per_prompt=4) return ret @@ -418,17 +422,18 @@ def start_collector(config): results = run_vllm_collector(config) return results -def main(tot_dataset, free_gpus,config): - num_tot=len(tot_dataset) - num_gpu=len(free_gpus) - num_per_gpu=num_tot//num_gpu - prompts_per_gpu=chunk_list(tot_dataset,num_per_gpu) + +def main(tot_dataset, free_gpus, config): + num_tot = len(tot_dataset) + num_gpu = len(free_gpus) + num_per_gpu = num_tot // num_gpu + prompts_per_gpu = chunk_list(tot_dataset, num_per_gpu) with concurrent.futures.ProcessPoolExecutor(max_workers=len(free_gpus)) as executor: futures = [] - for gpu_id,prompts_gpu in zip(free_gpus,prompts_per_gpu): - config_per_gpu=copy.deepcopy(config) - config_per_gpu.dataset=prompts_gpu - config_per_gpu.free_gpus=[gpu_id] + for gpu_id, prompts_gpu in zip(free_gpus, prompts_per_gpu): + config_per_gpu = copy.deepcopy(config) + config_per_gpu.dataset = prompts_gpu + config_per_gpu.free_gpus = [gpu_id] futures.append(executor.submit(start_collector, config_per_gpu)) # collect all results @@ -442,43 +447,40 @@ def main(tot_dataset, free_gpus,config): print(response) for prompt in list(response.keys()): f.write(f"{prompt}:\n") - for i,output in enumerate(response[prompt].outputs): + for i, output in enumerate(response[prompt].outputs): f.write(f'output_{i}:\n') f.write(f"{output.text}\n") - - -test_dataset=dataset(num=16) -free_gpus=get_free_gpus() -config = EasyDict( - # (str) LLM/VLM model path - model_path='/mnt/afs/share/Qwen2-VL-7B', - # (int) Maximum number of tokens to generate per request - max_tokens=4096, - # (float) Temperature for sampling, 0 means greedy decoding - temperature=1.0, - # (dict) Multimodal processor kwargs for vision-language models - mm_processor_kwargs={ - "min_pixels": 28 * 28, - "max_pixels": 1280 * 28 * 28, - },# defaul set to align with Qwen2-VL-7B - # Dataset related configs - # dataset=test_dataset, - # dataset is defined for each gpu respectively - # (str) Key to access the input data in the dataset - input_key='query', - # (bool) Whether to apply a chat template to the input - apply_chat_template=True, - # (str) Template for the input - input_template=None, - # (bool) Whether to shuffle the dataset - shuffle=True, - extra_input_keys=['image'], - # free_gpus is defined for each gpu respectively - # save_path is the file to store the output - save_path="your_save_path" - ) - - -main(test_dataset,free_gpus,config) \ No newline at end of file +test_dataset = dataset(num=16) +free_gpus = get_free_gpus() +config = EasyDict( + # (str) LLM/VLM model path + model_path='/mnt/afs/share/Qwen2-VL-7B', + # (int) Maximum number of tokens to generate per request + max_tokens=4096, + # (float) Temperature for sampling, 0 means greedy decoding + temperature=1.0, + # (dict) Multimodal processor kwargs for vision-language models + mm_processor_kwargs={ + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, + }, # defaul set to align with Qwen2-VL-7B + # Dataset related configs + # dataset=test_dataset, + # dataset is defined for each gpu respectively + # (str) Key to access the input data in the dataset + input_key='query', + # (bool) Whether to apply a chat template to the input + apply_chat_template=True, + # (str) Template for the input + input_template=None, + # (bool) Whether to shuffle the dataset + shuffle=True, + extra_input_keys=['image'], + # free_gpus is defined for each gpu respectively + # save_path is the file to store the output + save_path="your_save_path" +) + +main(test_dataset, free_gpus, config) diff --git a/ding/worker/collector/vllm_collector.py b/ding/worker/collector/vllm_collector.py index d1c086410c..ca37a74039 100644 --- a/ding/worker/collector/vllm_collector.py +++ b/ding/worker/collector/vllm_collector.py @@ -233,6 +233,7 @@ def reset_env(self, _env: Optional[Any] = None) -> None: Since LLM generation does not require a explicit policy and env, this function is empty. """ pass + async def _generate_for_prompt(self, prompt: str, num_samples_per_prompt: int) -> List[Tuple[str, float]]: return await self._model.generate( prompt=prompt, @@ -240,6 +241,7 @@ async def _generate_for_prompt(self, prompt: str, num_samples_per_prompt: int) - max_tokens=self._cfg.max_tokens, temperature=self._cfg.temperature ) + def collect( self, n_samples: int = 100, @@ -259,11 +261,11 @@ def collect( if self._model is None: raise RuntimeError("Model not initialized. Call `reset` method first.") - prompts=[] + prompts = [] for id in self._index[:n_samples]: prompts.append(self._dataset[id]) # recusively update the index - self._index = np.concatenate((self._index[n_samples:],self._index[:n_samples])) + self._index = np.concatenate((self._index[n_samples:], self._index[:n_samples])) self._envstep += n_samples @@ -285,7 +287,7 @@ def collect( responses = {prompt["prompt"]: result for prompt, result in zip(prompts, results)} return responses - + def sync_collect( self, n_samples: int = 100, @@ -305,11 +307,11 @@ def sync_collect( if self._model is None: raise RuntimeError("Model not initialized. Call `reset` method first.") - prompts=[] + prompts = [] for id in self._index[:n_samples]: prompts.append(self._dataset[id]) # recusively update the index - self._index = np.concatenate((self._index[n_samples:],self._index[:n_samples])) + self._index = np.concatenate((self._index[n_samples:], self._index[:n_samples])) self._envstep += n_samples @@ -323,7 +325,7 @@ def sync_collect( # Run the async generate method in the event loop results = {} for prompt in prompts: - # Run the async generate method in the event loop for each prompt + # Run the async generate method in the event loop for each prompt result = loop.run_until_complete( self._model.generate( prompt=prompt, @@ -334,8 +336,8 @@ def sync_collect( ) results[prompt['prompt']] = result - return results - + return results + def collect_prompts( self, n_samples: int = 100, @@ -355,11 +357,11 @@ def collect_prompts( if self._model is None: raise RuntimeError("Model not initialized. Call `reset` method first.") - prompts=[] + prompts = [] for id in self._index[:n_samples]: prompts.append(self._dataset[id]) # recusively update the index - self._index = np.concatenate((self._index[n_samples:],self._index[:n_samples])) + self._index = np.concatenate((self._index[n_samples:], self._index[:n_samples])) self._envstep += n_samples @@ -372,19 +374,17 @@ def collect_prompts( # Run the async generate method in the event loop results = {} - tasks=[] + tasks = [] for prompt in prompts: - for _ in range(num_samples_per_prompt): - # Run the async generate method in the event loop for each prompt + for _ in range(num_samples_per_prompt): + # Run the async generate method in the event loop for each prompt tasks.append(self._generate_for_prompt(prompt, num_samples_per_prompt=1)) results_list = loop.run_until_complete(asyncio.gather(*tasks)) - for i,prompt in enumerate(prompts): - results[prompt['prompt']]=[] - for result in results_list[i*4:(i+1)*4]: + for i, prompt in enumerate(prompts): + results[prompt['prompt']] = [] + for result in results_list[i * 4:(i + 1) * 4]: results[prompt['prompt']].append(result.outputs[0].text) - return results - - + return results @property def envstep(self) -> int: From 092e5145627b32eb767c57a1efab7470f01e626d Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Mon, 10 Feb 2025 08:07:00 +0000 Subject: [PATCH 16/18] formatted --- .flake8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.flake8 b/.flake8 index 8b176d3853..9d86ca5e8c 100644 --- a/.flake8 +++ b/.flake8 @@ -1,4 +1,4 @@ [flake8] ignore=F401,F841,F403,E226,E126,W504,E265,E722,W503,W605,E741,E122,E731 max-line-length=120 - +statistics From eda18c5c88df39d4e4e2b0c5af87c73aa4488cda Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Mon, 10 Feb 2025 08:16:14 +0000 Subject: [PATCH 17/18] formatted --- ding/utils/data/rlhf_online_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ding/utils/data/rlhf_online_dataset.py b/ding/utils/data/rlhf_online_dataset.py index 08f6838d61..0386fca534 100644 --- a/ding/utils/data/rlhf_online_dataset.py +++ b/ding/utils/data/rlhf_online_dataset.py @@ -53,7 +53,7 @@ def __init__( self.prompts.append(processed_data['prompt']) #maybe can be imporved later for key in extra_input_keys: - getattr(self, key).append(processed_data[key]) + getattr(self, key).append(processed_data[key]) # self.prompts=np.array(self.prompts) # for key in extra_input_keys: # setattr(self, key, np.array(getattr(self,key))) From 81a1016454cad1b3f74e32f86bab0a954ed772d9 Mon Sep 17 00:00:00 2001 From: wqj2004 <2285705435@qq.com> Date: Tue, 18 Feb 2025 09:08:35 +0000 Subject: [PATCH 18/18] added pytest --- .../collector/tests/test_vllm_collector.py | 187 +++++-- .../tests/test_vllm_collector_multi_new.py | 529 ++++-------------- .../tests/test_vllm_collector_multigpu.py | 276 +++------ ding/worker/collector/vllm_collector.py | 77 ++- .../halfcheetah_medium_expert_iql_config.py | 1 - .../config/halfcheetah_medium_iql_config.py | 1 - .../halfcheetah_medium_replay_iql_config.py | 1 - .../config/hopper_medium_expert_iql_config.py | 1 - dizoo/d4rl/config/hopper_medium_iql_config.py | 1 - .../config/hopper_medium_replay_iql_config.py | 1 - 10 files changed, 373 insertions(+), 702 deletions(-) diff --git a/ding/worker/collector/tests/test_vllm_collector.py b/ding/worker/collector/tests/test_vllm_collector.py index d8b7beaf93..ca210bdae7 100644 --- a/ding/worker/collector/tests/test_vllm_collector.py +++ b/ding/worker/collector/tests/test_vllm_collector.py @@ -1,15 +1,13 @@ from typing import List, Tuple, Optional -import os -import uuid -from loguru import logger -from ..vllm_collector import HuggingFaceModelGenerator +from ding.worker.collector.vllm_collector import HuggingFaceModelGenerator, get_free_gpus from vllm.assets.image import ImageAsset from enum import Enum +from datasets import load_dataset import asyncio -import nest_asyncio -# set a temperature > 0 to get multiple responses -# note that HFModelGenerator has a parameter "mm_processor_kwargs" set to align with the settings of Qwen in default -model = HuggingFaceModelGenerator('/mnt/afs/share/Qwen2-VL-7B', temperature=0.5) +from PIL import Image +import os +import concurrent.futures +import pytest class Modality(Enum): @@ -18,6 +16,24 @@ class Modality(Enum): VIDEO = "video" +def chunk_list(original_list: List, t: int): + # chunk a list into sub_lists + # base length of sublists + base_length = len(original_list) // t + # remaind length of some sub_lists + remainder = len(original_list) % t + new_list = [] + index = 0 + for i in range(t): + if i < remainder: + sublist_length = base_length + 1 + else: + sublist_length = base_length + new_list.append(original_list[index:index + sublist_length]) + index += sublist_length + return new_list + + def get_prompts_qwen(questions: list, modality: Modality) -> Tuple[List[str], Optional[List[int]]]: if modality == Modality.IMAGE: placeholder = "<|image_pad|>" @@ -50,9 +66,14 @@ def get_multi_modal_input(modality: Modality, filenames: list, questions: list) # Input image and question ret = {'data': [], 'question': []} for filename, question in zip(filenames, questions): - image = ImageAsset(filename) \ + if isinstance(filename, str): + image = ImageAsset(filename) \ .pil_image.convert("RGB") #img_question = "What is the content of this image?" + elif isinstance(filename, Image.Image): + image = filename + else: + raise ValueError(f"Unsupported type in filenames: {type(filename)}") img_question = question ret["data"].append(image) ret["question"].append(img_question) @@ -62,38 +83,132 @@ def get_multi_modal_input(modality: Modality, filenames: list, questions: list) return ret -questions = [ - "What is the content of this image?", "Please describe the image.", - "How many people are there in the image? What are they doing?" -] -img_names = [ - '/mnt/afs/niuyazhe/data/meme/data/Eimages/Eimages/Eimages/image_ (2)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(3127)', '/mnt/afs/wangqijian/data/test/test' -] - -num_prompts = len(questions) -image_repeat_prob = None - -modality = Modality.IMAGE - -mm_input = get_multi_modal_input(modality, img_names, questions) -data = mm_input["data"] -question = mm_input["question"] -prompts, stop_token_ids = get_prompts_qwen(question, modality) - -nest_asyncio.apply() - - -async def main(): - inputs = [{"prompt": prompt, "multi_modal_data": {modality.value: data}} for prompt, data in zip(prompts, data)] +# -----------------testing single gpu vllm_actor -------------------------------- +async def single_main(model_path: str, gpu: list, temperature: float, modality: str, prompts: list, data: list): + # note that HFModelGenerator has a parameter + # "mm_processor_kwargs" set to align with the settings of Qwen in default + model = HuggingFaceModelGenerator(model_path=model_path, free_gpus=gpu, temperature=temperature) + inputs = [{"prompt": prompt, "multi_modal_data": {modality: data}} for prompt, data in zip(prompts, data)] # generate responses + response_ret = [] for in_data in inputs: responses = await model.generate(prompt=in_data, num_samples=3) # print response + response_per_prompt = [] for response, confidence in responses: - print(f"Response: {response}") + response_per_prompt.append(response) + response_ret.append(response_per_prompt) + return response_ret # run main -if __name__ == "__main__": - asyncio.run(main()) +@pytest.mark.unittest +def test_single_main(): + # set a temperature > 0 to get multiple responses + free_gpus = get_free_gpus() + model_path = 'Qwen/Qwen2-VL-7B' + temperature = 0.5 + questions = [] + img_names = [] + sample_num = 4 + hf_dataset = load_dataset("MMInstruction/VL-RewardBench", split='test') + for i in range(sample_num): + img_names.append(hf_dataset[i]["image"]) + questions.append(hf_dataset[i]["query"]) + assert len(img_names) == len(questions) + modality = Modality.IMAGE + mm_input = get_multi_modal_input(modality, img_names, questions) + data = mm_input["data"] + question = mm_input["question"] + prompts, stop_token_ids = get_prompts_qwen(question, modality) + responses = asyncio.run( + single_main( + model_path=model_path, + gpu=[free_gpus[0]], + temperature=temperature, + modality=modality.value, + prompts=prompts, + data=data + ) + ) + assert len(responses) == len(questions) + + +# -----------------testing multi gpu vllm_actor -------------------------------- +async def run_vllm_collector(gpu_list: list, prompts: List, model_path: str, temperature: float) -> List[str]: + # set visible gpu + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_list)) + # get a model on a single gpu + model = HuggingFaceModelGenerator(model_path, free_gpus=gpu_list, temperature=temperature) + + # get response for each prompts (can be improved later using async generation) + responses_list = [] + for prompt in prompts: + responses = await model.generate(prompt, num_samples=3) + for response in responses: + responses_list.append(response) + #print(f"[GPU {gpu_list}] Response: {response}") + + return responses_list + + +def start_collector(gpu_list: list, prompts: list, model_path: str, temperature: float) -> List[str]: + # event loop in a process + results = asyncio.run(run_vllm_collector(gpu_list, prompts, model_path, temperature)) + return results + + +def multi_main( + prompts: list, model_path: str, free_gpus: List[int], temperature: float, num_per_gpus_collector: int +) -> None: + # solve how mant collectors to use + num_collector = len(free_gpus) // num_per_gpus_collector + # slove how many gpus a collector should use + gpus_per_collector = chunk_list(free_gpus, num_collector) + # split input_prompts to collectors equally + prompts_per_gpu = chunk_list(prompts, num_collector) + with concurrent.futures.ProcessPoolExecutor(max_workers=num_collector) as executor: + futures = [] + for gpu_list, prompts_gpu in zip(gpus_per_collector, prompts_per_gpu): + futures.append(executor.submit(start_collector, gpu_list, prompts_gpu, model_path, temperature)) + + # get all results + all_results = [] + for future in concurrent.futures.as_completed(futures): + all_results.append(future.result()) + + return all_results + + +@pytest.mark.unittest +def test_multi_main(): + # get dataset + hf_dataset = load_dataset("MMInstruction/VL-RewardBench", split='test') + img_names = [] + questions = [] + num = 16 + for i in range(num): + img_names.append(hf_dataset[i]["image"]) + questions.append(hf_dataset[i]["query"]) + assert len(img_names) == len(questions) + #get gpus + free_gpus = get_free_gpus() + # set modality + modality = Modality.IMAGE + # get input + mm_input = get_multi_modal_input(modality, img_names, questions) + data = mm_input["data"] + question = mm_input["question"] + # get prompts + prompts, stop_token_ids = get_prompts_qwen(question, modality) + # set necessary parameters + model_path = 'Qwen/Qwen2-VL-7B' + temperature = 0.5 + num_gpus_per_collector = 1 + assert len(free_gpus) >= num_gpus_per_collector + # set inputs + inputs = [{"prompt": prompt, "multi_modal_data": {modality.value: data}} for prompt, data in zip(prompts, data)] + # get results + result = multi_main(inputs, model_path, free_gpus, temperature, num_gpus_per_collector) + # default num_smaples is 3, can be modified in line 93 + assert len(result) == len(questions) diff --git a/ding/worker/collector/tests/test_vllm_collector_multi_new.py b/ding/worker/collector/tests/test_vllm_collector_multi_new.py index affb35a5e9..0a255d0624 100644 --- a/ding/worker/collector/tests/test_vllm_collector_multi_new.py +++ b/ding/worker/collector/tests/test_vllm_collector_multi_new.py @@ -1,381 +1,29 @@ from transformers import AutoTokenizer from typing import List, Tuple, Optional, Any import os -import uuid -import asyncio -import numpy as np -from loguru import logger from easydict import EasyDict -from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams, RequestOutput -from ding.utils import SERIAL_COLLECTOR_REGISTRY -from ding.worker.collector.base_serial_collector import ISerialCollector from datasets import load_dataset -from ding.utils.data import OnlineRLDataset +from ding.worker.collector.vllm_collector import VllmCollector, get_free_gpus import copy import concurrent.futures +import pytest -class VllmActor: - - def __init__(self, model_path: str, mm_processor_kwargs: dict, free_gpus: list) -> None: - """ - Overview: - Initialize the vLLM actor. For more details, please refer to https://docs.vllm.ai/en/stable. - Arguments: - - model_path (str): The path to the language model. - """ - self.free_gpus = free_gpus - self.num_gpus = len(self.free_gpus) - assert self.num_gpus > 0, "No GPUs found" - # Set CUDA_VISIBLE_DEVICES to use only free GPUs - os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, self.free_gpus)) - self.model_path = model_path - self.mm_processor_kwargs = mm_processor_kwargs - self._initialize() - - def _initialize(self) -> None: - """ - Overview: - Initialize the vLLM actor with a series of arguments. - """ - logger.info("Initializing vLLM") - # TODO: Try other options in https://docs.vllm.ai/en/stable/models/engine_args.html#engine-args. - engine_args = AsyncEngineArgs( - model=self.model_path, - tensor_parallel_size=self.num_gpus, - max_num_batched_tokens=8192, - max_model_len=8192, - # enable_chunked_prefill=True, - max_num_seqs=5, - # Note - mm_processor_kwargs can also be passed to generate/chat calls - mm_processor_kwargs=self.mm_processor_kwargs, - ) - self.engine = AsyncLLMEngine.from_engine_args(engine_args) - - async def generate(self, prompt, num_samples: int, max_tokens: int, temperature: float = 0) -> RequestOutput: - """ - Overview: - Generate tactics for the current state. - Arguments: - - prompt : The prompt to generate tactics. - - num_samples (int): The number of tactics to generate. - - max_tokens (int): The maximum number of tokens to generate. - - temperature (float): The temperature for the language model, default to 0. - Returns: - - RequestOutput: The generated tactics and their log-probabilities. - """ - sampling_params = SamplingParams( - n=num_samples, - max_tokens=max_tokens, - temperature=temperature, - ) - - # Using async iterator to handle vLLM's generation process - # 1. vLLM's generate method is asynchronous to prevent blocking while waiting for model outputs - # 2. async for allows streaming the generated outputs incrementally instead of waiting for all results - # 3. This approach is particularly suitable for LLM inference which can be time-consuming - # 4. The request_id ensures unique identification for each generation request - async for oup in self.engine.generate( - prompt, sampling_params, request_id=str(uuid.uuid4().hex) - ): - final_output = oup - return final_output - - -@SERIAL_COLLECTOR_REGISTRY.register('vllm') -class VllmCollector(ISerialCollector): - """ - Overview: - Collector implementation for vLLM-based language models (LLM/VLM). - This collector manages the interaction with vLLM models for text generation tasks. - """ - config = dict( - # (str) LLM/VLM model path - model_path='', - # (int) Maximum number of tokens to generate per request - max_tokens=1024, - # (float) Temperature for sampling, 0 means greedy decoding - temperature=0.0, - # (dict) Multimodal processor kwargs for vision-language models - mm_processor_kwargs={ - "min_pixels": 28 * 28, - "max_pixels": 1280 * 28 * 28, - }, - # Dataset related configs - # (str) Key to access the input data in the dataset - input_key='input', - # (bool) Whether to apply a chat template to the input - apply_chat_template=False, - # (str) Template for the input - input_template=None, - # (bool) Whether to shuffle the dataset - shuffle=True, - ) - - def __init__(self, cfg: EasyDict) -> None: - """ - Overview: - Initialize the VllmCollector with configuration. - Arguments: - - cfg (:obj:`EasyDict`): Configuration for the collector including model path, generation parameters, - and dataset configuration - """ - super().__init__() - self._cfg = cfg - self._envstep = 0 - - # Initialize the tokenizer and dataset - self._tokenizer = AutoTokenizer.from_pretrained(cfg.model_path) - self._dataset = OnlineRLDataset( - dataset=cfg.dataset, - tokenizer=self._tokenizer, - input_key=cfg.input_key, - apply_chat_template=cfg.apply_chat_template, - input_template=cfg.input_template, - extra_input_keys=cfg.extra_input_keys - ) - - self._model = VllmActor( - model_path=cfg.model_path, mm_processor_kwargs=cfg.mm_processor_kwargs, free_gpus=cfg.free_gpus - ) - self.reset() - - def reset(self) -> None: - """ - Overview: - Reset the collector, including the dataset index. - """ - self._index = np.arange(len(self._dataset)) - if self._cfg.shuffle: - np.random.shuffle(self._index) - - def reset_policy(self, _model: Optional[str] = None) -> None: - """ - Overview: - Since LLM generation does not require a explicit policy and env, this function is empty. - """ - pass - - def reset_env(self, _env: Optional[Any] = None) -> None: - """ - Overview: - Since LLM generation does not require a explicit policy and env, this function is empty. - """ - pass - - async def _generate_for_prompt(self, prompt: str, num_samples_per_prompt: int) -> List[Tuple[str, float]]: - return await self._model.generate( - prompt=prompt, - num_samples=num_samples_per_prompt, - max_tokens=self._cfg.max_tokens, - temperature=self._cfg.temperature - ) - - def collect( - self, - n_samples: int = 100, - num_samples_per_prompt: int = 1, - train_iter: int = 0, - ) -> List[Tuple[str, float]]: - """ - Overview: - Collect generated responses from the vLLM model. - Arguments: - - n_samples (:obj:`int`): Number of prompts to generate. - - num_samples_per_prompt (:obj:`int`): Number of samples to generate per prompt. - - train_iter (:obj:`int`): Current training iteration, used for logging. - Returns: - - responses (:obj:`List[Tuple[str, float]]`): List of (generated_text, confidence_score) pairs - """ - if self._model is None: - raise RuntimeError("Model not initialized. Call `reset` method first.") - - prompts = [] - for id in self._index[:n_samples]: - prompts.append(self._dataset[id]) - # recusively update the index - self._index = np.concatenate((self._index[n_samples:], self._index[:n_samples])) - - self._envstep += n_samples - - # Get the current event loop or create a new one - try: - loop = asyncio.get_event_loop() - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - # Run the async generate method in the event loop - # Create a list of tasks for each prompt - tasks = [self._generate_for_prompt(prompt, num_samples_per_prompt) for prompt in prompts] - - # Run all tasks concurrently and collect results - results = loop.run_until_complete(asyncio.gather(*tasks)) - - # Map prompts to their corresponding results - responses = {prompt["prompt"]: result for prompt, result in zip(prompts, results)} - - return responses - - def sync_collect( - self, - n_samples: int = 100, - num_samples_per_prompt: int = 1, - train_iter: int = 0, - ) -> List[Tuple[str, float]]: - """ - Overview: - Collect generated responses from the vLLM model. - Arguments: - - n_samples (:obj:`int`): Number of prompts to generate. - - num_samples_per_prompt (:obj:`int`): Number of samples to generate per prompt. - - train_iter (:obj:`int`): Current training iteration, used for logging. - Returns: - - responses (:obj:`List[Tuple[str, float]]`): List of (generated_text, confidence_score) pairs - """ - if self._model is None: - raise RuntimeError("Model not initialized. Call `reset` method first.") - - prompts = [] - for id in self._index[:n_samples]: - prompts.append(self._dataset[id]) - # recusively update the index - self._index = np.concatenate((self._index[n_samples:], self._index[:n_samples])) - - self._envstep += n_samples - - # Get the current event loop or create a new one - try: - loop = asyncio.get_event_loop() - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - # Run the async generate method in the event loop - results = {} - for prompt in prompts: - # Run the async generate method in the event loop for each prompt - result = loop.run_until_complete( - self._model.generate( - prompt=prompt, - num_samples=num_samples_per_prompt, - max_tokens=self._cfg.max_tokens, - temperature=self._cfg.temperature - ) - ) - results[prompt['prompt']] = result - - return results - - def collect_prompts( - self, - n_samples: int = 100, - num_samples_per_prompt: int = 1, - train_iter: int = 0, - ) -> List[Tuple[str, float]]: - """ - Overview: - Collect generated responses from the vLLM model. - Arguments: - - n_samples (:obj:`int`): Number of prompts to generate. - - num_samples_per_prompt (:obj:`int`): Number of samples to generate per prompt. - - train_iter (:obj:`int`): Current training iteration, used for logging. - Returns: - - responses (:obj:`List[Tuple[str, float]]`): List of (generated_text, confidence_score) pairs - """ - if self._model is None: - raise RuntimeError("Model not initialized. Call `reset` method first.") - - prompts = [] - for id in self._index[:n_samples]: - prompts.append(self._dataset[id]) - # recusively update the index - self._index = np.concatenate((self._index[n_samples:], self._index[:n_samples])) - - self._envstep += n_samples - - # Get the current event loop or create a new one - try: - loop = asyncio.get_event_loop() - except RuntimeError: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - - # Run the async generate method in the event loop - results = {} - tasks = [] - for prompt in prompts: - for _ in range(num_samples_per_prompt): - # Run the async generate method in the event loop for each prompt - tasks.append(self._generate_for_prompt(prompt, num_samples_per_prompt=1)) - results_list = loop.run_until_complete(asyncio.gather(*tasks)) - for i, prompt in enumerate(prompts): - results[prompt['prompt']] = [] - for result in results_list[i * num_samples_per_prompt:(i + 1) * num_samples_per_prompt]: - results[prompt['prompt']].append(result.outputs[0].text) - return results - - @property - def envstep(self) -> int: - """ - Overview: - Get the current environment step count. - Returns: - - count (:obj:`int`): Current environment step count - """ - return self._envstep - - @envstep.setter - def envstep(self, value: int) -> None: - """ - Overview: - Set the current environment step count. - """ - self._envstep = value - - def close(self) -> None: - """ - Overview: - Close the collector. - """ - pass - - def __del__(self) -> None: - """ - Overview: - Destructor for the collector. - """ - self.close() - - -def get_free_gpus() -> List[int]: - """ - Overview: - Get IDs of GPUs with free memory. - Returns: - - List[int]: The IDs of the free GPUs. - """ - try: - # Get GPU memory usage using nvidia-smi - gpu_stats = os.popen('nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader')\ - .readlines() - free_gpus = [] - - for gpu_id, stats in enumerate(gpu_stats): - mem_used, mem_total = map(int, stats.strip().split(',')) - # Consider GPU as free if less than 5% memory is used - if mem_used / mem_total < 0.05: - free_gpus.append(gpu_id) - - return free_gpus if free_gpus else [0] # Default to GPU 0 if no free GPUs found - except Exception: - logger.warning("Failed to get GPU stats, defaulting to GPU 0") - return [0] - - -def chunk_list(original_list, t): +def chunk_list(original_list: List, t: int) -> List[List]: # chunk a list into sub_lists - new_list = [original_list[i:i + t] for i in range(0, len(original_list), t)] + # base length of sublists + base_length = len(original_list) // t + # remaind length of some sub_lists + remainder = len(original_list) % t + new_list = [] + index = 0 + for i in range(t): + if i < remainder: + sublist_length = base_length + 1 + else: + sublist_length = base_length + new_list.append(original_list[index:index + sublist_length]) + index += sublist_length return new_list @@ -385,9 +33,9 @@ def chunk_list(original_list, t): PLACE_HOLDER = '<|image_pad|>' -def dataset(num=None): +def dataset(num: int = None) -> List: # Load the dataset - hf_dataset = load_dataset("/mnt/afs/wangqijian/data/rlhf_dataset_test/VL-RewardBench", split='test') + hf_dataset = load_dataset("MMInstruction/VL-RewardBench", split='test') hf_dataset0 = hf_dataset.map( lambda x: { "query": f"{IMG_START_TOKEN}{PLACE_HOLDER}{IMG_END_TOKEN}{x['query']}", @@ -405,17 +53,24 @@ def dataset(num=None): return ret_data -def run_vllm_collector(config): +def run_vllm_collector(config: EasyDict) -> List[dict]: + ''' + ret:[ + { + "prompt_i":output([output_text_0,output_text_1,...,]) + } + ] + ''' # set GPU for current process gpu_ids = ",".join(map(str, config.free_gpus)) os.environ["CUDA_VISIBLE_DEVICES"] = gpu_ids - collector = VllmCollector(config) # 实例化模型 + collector = VllmCollector(config) #ret=collector.collect(n_samples=2,num_samples_per_prompt=4) - ret = collector.collect(n_samples=2, num_samples_per_prompt=4) + ret = collector.collect(n_samples=config.n_samples, num_samples_per_prompt=config.num_samples_per_prompt) return ret -def start_collector(config): +def start_collector(config: EasyDict): # collect within the process # results:a dict, basic form: #{"prompt_0":[ans_0,ans_1,...,ans_n],"prompt_1":[ans_0,ans_1,...,ans_n],...} @@ -423,64 +78,84 @@ def start_collector(config): return results -def main(tot_dataset, free_gpus, config): - num_tot = len(tot_dataset) - num_gpu = len(free_gpus) - num_per_gpu = num_tot // num_gpu - prompts_per_gpu = chunk_list(tot_dataset, num_per_gpu) - with concurrent.futures.ProcessPoolExecutor(max_workers=len(free_gpus)) as executor: +def multi_vllm_main(tot_dataset, free_gpus: list, config: EasyDict): + ''' + tot_dataset: the total dataset to process + free_gpus: list of total gpus available for the task + config: user defined config about how to do the task + ''' + num_gpu_per_collector = config.num_gpus_per_collector + # how many collector to use + num_collector = len(free_gpus) // num_gpu_per_collector + # list of list, each list contains the gpus the collecor can use + gpu_per_collector = chunk_list(free_gpus, num_collector) + prompts_per_gpu = chunk_list(tot_dataset, num_collector) + with concurrent.futures.ProcessPoolExecutor(max_workers=num_collector) as executor: futures = [] - for gpu_id, prompts_gpu in zip(free_gpus, prompts_per_gpu): + for gpu_list, prompts_per_collector in zip(gpu_per_collector, prompts_per_gpu): config_per_gpu = copy.deepcopy(config) - config_per_gpu.dataset = prompts_gpu - config_per_gpu.free_gpus = [gpu_id] + config_per_gpu.dataset = prompts_per_collector + config_per_gpu.free_gpus = gpu_list + #config_per_gpu.n_samples = len(prompts_per_collector) + config_per_gpu.n_samples = 2 futures.append(executor.submit(start_collector, config_per_gpu)) # collect all results all_results = [] for future in concurrent.futures.as_completed(futures): all_results.append(future.result()) - - # save results - with open(config.save_path, "w") as f: - for response in all_results: - print(response) - for prompt in list(response.keys()): - f.write(f"{prompt}:\n") - for i, output in enumerate(response[prompt].outputs): - f.write(f'output_{i}:\n') - f.write(f"{output.text}\n") - - -test_dataset = dataset(num=16) -free_gpus = get_free_gpus() -config = EasyDict( - # (str) LLM/VLM model path - model_path='/mnt/afs/share/Qwen2-VL-7B', - # (int) Maximum number of tokens to generate per request - max_tokens=4096, - # (float) Temperature for sampling, 0 means greedy decoding - temperature=1.0, - # (dict) Multimodal processor kwargs for vision-language models - mm_processor_kwargs={ - "min_pixels": 28 * 28, - "max_pixels": 1280 * 28 * 28, - }, # defaul set to align with Qwen2-VL-7B - # Dataset related configs - # dataset=test_dataset, - # dataset is defined for each gpu respectively - # (str) Key to access the input data in the dataset - input_key='query', - # (bool) Whether to apply a chat template to the input - apply_chat_template=True, - # (str) Template for the input - input_template=None, - # (bool) Whether to shuffle the dataset - shuffle=True, - extra_input_keys=['image'], - # free_gpus is defined for each gpu respectively - # save_path is the file to store the output - save_path="your_save_path" -) - -main(test_dataset, free_gpus, config) + return all_results + + # # save results + # with open(config.save_path, "w") as f: + # for response in all_results: + # #print(response) + # for prompt in list(response.keys()): + # f.write(f"{prompt}:\n") + # for i, output in enumerate(response[prompt].outputs): + # f.write(f'output_{i}:\n') + # f.write(f"{output.text}\n") + + +@pytest.mark.unittest +def test_multi_vllm(): + test_dataset = dataset(num=16) + free_gpus = get_free_gpus() + config = EasyDict( + # (str) LLM/VLM model path + model_path='Qwen/Qwen2-VL-7B', + # (int) Maximum number of tokens to generate per request + max_tokens=4096, + # (float) Temperature for sampling, 0 means greedy decoding + temperature=1.0, + # (dict) Multimodal processor kwargs for vision-language models + mm_processor_kwargs={ + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, + }, # defaul set to align with Qwen2-VL-7B + # Dataset related configs + # dataset=test_dataset, + # dataset is defined for each gpu respectively + # (str) Key to access the input data in the dataset + input_key='query', + # (bool) Whether to apply a chat template to the input + apply_chat_template=True, + # (str) Template for the input + input_template=None, + # (bool) Whether to shuffle the dataset + shuffle=True, + extra_input_keys=['image'], + # free_gpus is defined for each gpu respectively + # save_path is the file to store the output + save_path="your_path", + # how many gpus a collector can use + num_gpus_per_collector=1, + num_samples_per_prompt=4 + ) + result = multi_vllm_main(test_dataset, free_gpus, config) + collector_num = len(free_gpus) // config.num_gpus_per_collector + assert len(result) == collector_num + for response in result: + prompts = list(response.keys()) + for prompt in prompts: + assert config.num_samples_per_prompt == len(response[prompt].outputs) diff --git a/ding/worker/collector/tests/test_vllm_collector_multigpu.py b/ding/worker/collector/tests/test_vllm_collector_multigpu.py index 1c7d79e1bb..966171f523 100644 --- a/ding/worker/collector/tests/test_vllm_collector_multigpu.py +++ b/ding/worker/collector/tests/test_vllm_collector_multigpu.py @@ -1,161 +1,30 @@ from typing import List, Tuple, Optional import os -import uuid -from loguru import logger -from vllm import AsyncLLMEngine, AsyncEngineArgs, SamplingParams, RequestOutput from vllm.assets.image import ImageAsset from enum import Enum +from ding.worker.collector.vllm_collector import HuggingFaceModelGenerator, get_free_gpus +from PIL import Image +from datasets import load_dataset import concurrent.futures import asyncio - - -class VllmActor: - - def __init__(self, model_path: str, mm_processor_kwargs: dict, free_gpus: list) -> None: - """ - Overview: - Initialize the vLLM actor. For more details, please refer to https://docs.vllm.ai/en/stable. - Arguments: - - model_path (str): The path to the language model. - """ - self.free_gpus = free_gpus - self.num_gpus = len(self.free_gpus) - assert self.num_gpus > 0, "No GPUs found" - # Set CUDA_VISIBLE_DEVICES to use only free GPUs - os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, self.free_gpus)) - self.model_path = model_path - self.mm_processor_kwargs = mm_processor_kwargs - self._initialize() - - def _initialize(self) -> None: - """ - Overview: - Initialize the vLLM actor with a series of arguments. - """ - logger.info("Initializing vLLM") - # TODO: Try other options in https://docs.vllm.ai/en/stable/models/engine_args.html#engine-args. - engine_args = AsyncEngineArgs( - model=self.model_path, - tensor_parallel_size=self.num_gpus, - max_num_batched_tokens=8192, - max_model_len=8192, - # enable_chunked_prefill=True, - max_num_seqs=5, - # Note - mm_processor_kwargs can also be passed to generate/chat calls - mm_processor_kwargs=self.mm_processor_kwargs, - ) - self.engine = AsyncLLMEngine.from_engine_args(engine_args) - - async def generate(self, prompt, num_samples: int, max_tokens: int, temperature: float = 0) -> RequestOutput: - """ - Overview: - Generate tactics for the current state. - Arguments: - - prompt : The prompt to generate tactics. - - num_samples (int): The number of tactics to generate. - - max_tokens (int): The maximum number of tokens to generate. - - temperature (float): The temperature for the language model, default to 0. - Returns: - - RequestOutput: The generated tactics and their log-probabilities. - """ - sampling_params = SamplingParams( - n=num_samples, - max_tokens=max_tokens, - temperature=temperature, - ) - - # Using async iterator to handle vLLM's generation process - # 1. vLLM's generate method is asynchronous to prevent blocking while waiting for model outputs - # 2. async for allows streaming the generated outputs incrementally instead of waiting for all results - # 3. This approach is particularly suitable for LLM inference which can be time-consuming - # 4. The request_id ensures unique identification for each generation request - async for oup in self.engine.generate( - prompt, sampling_params, request_id=str(uuid.uuid4().hex) - ): - final_output = oup - return final_output - - -class HuggingFaceModelGenerator: - """ - Overview: - A LLM/VLM generator that uses Hugging Face models with vLLM as the backend. - """ - - def __init__( - self, - model_path: str, - free_gpus: list, - max_tokens: int = 1024, - temperature: float = 0, - mm_processor_kwargs: dict = { - "min_pixels": 28 * 28, - "max_pixels": 1280 * 28 * 28, - } - ) -> None: - """ - Overview: - Initialize the Hugging Face model generator. - Arguments: - - model_path (str): The path to the language model. - - max_tokens (int): The maximum number of tokens to generate, default to 1024. - - temperature (float): The temperature for the language model, default to 0. - """ - self.vllm_actor = VllmActor(model_path, mm_processor_kwargs, free_gpus) - self.max_tokens = max_tokens - self.temperature = temperature - - async def generate( - self, - prompt, - num_samples: int, - ) -> List[Tuple[str, float]]: - """ - Overview: - Generate tactics for the current state. - Arguments: - - prompt : The prompt to generate tactics. - - num_samples (int): The number of tactics to generate. - Returns: - - List[Tuple[str, float]]: The generated tactics and their log-probabilities. - - .. note:: - This method is asynchronous and returns a coroutine. - """ - response = await self.vllm_actor.generate(prompt, num_samples, self.max_tokens, self.temperature) - # Use raw logprobs as confidence scores - confidence_scores = [x.cumulative_logprob for x in response.outputs] - return [(x.text.strip(), conf) for x, conf in zip(response.outputs, confidence_scores)] - - -def get_free_gpus() -> List[int]: - """ - Overview: - Get IDs of GPUs with free memory. - Returns: - - List[int]: The IDs of the free GPUs. - """ - try: - # Get GPU memory usage using nvidia-smi - gpu_stats = os.popen('nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader')\ - .readlines() - free_gpus = [] - - for gpu_id, stats in enumerate(gpu_stats): - mem_used, mem_total = map(int, stats.strip().split(',')) - # Consider GPU as free if less than 5% memory is used - if mem_used / mem_total < 0.05: - free_gpus.append(gpu_id) - - return free_gpus if free_gpus else [0] # Default to GPU 0 if no free GPUs found - except Exception: - logger.warning("Failed to get GPU stats, defaulting to GPU 0") - return [0] - - -def chunk_list(original_list: list, t: int) -> List[list]: - # chunk the list into sub_lists - new_list = [original_list[i:i + t] for i in range(0, len(original_list), t)] +import pytest + + +def chunk_list(original_list: List, t: int): + # chunk a list into sub_lists + # base length of sublists + base_length = len(original_list) // t + # remaind length of some sub_lists + remainder = len(original_list) % t + new_list = [] + index = 0 + for i in range(t): + if i < remainder: + sublist_length = base_length + 1 + else: + sublist_length = base_length + new_list.append(original_list[index:index + sublist_length]) + index += sublist_length return new_list @@ -197,9 +66,14 @@ def get_multi_modal_input(modality: Modality, filenames: list, questions: list) # Input image and question ret = {'data': [], 'question': []} for filename, question in zip(filenames, questions): - image = ImageAsset(filename) \ + if isinstance(filename, str): + image = ImageAsset(filename) \ .pil_image.convert("RGB") #img_question = "What is the content of this image?" + elif isinstance(filename, Image.Image): + image = filename + else: + raise ValueError(f"Unsupported type in filenames: {type(filename)}") img_question = question ret["data"].append(image) ret["question"].append(img_question) @@ -209,84 +83,78 @@ def get_multi_modal_input(modality: Modality, filenames: list, questions: list) return ret -async def run_vllm_collector(gpu_id: int, prompts: List, model_path: str, temperature: float) -> List[str]: +async def run_vllm_collector(gpu_list: list, prompts: List, model_path: str, temperature: float) -> List[str]: # set visible gpu - os.environ["CUDA_VISIBLE_DEVICES"] = str(gpu_id) + os.environ["CUDA_VISIBLE_DEVICES"] = ",".join(map(str, gpu_list)) # get a model on a single gpu - model = HuggingFaceModelGenerator(model_path, free_gpus=[gpu_id], temperature=temperature) + model = HuggingFaceModelGenerator(model_path, free_gpus=gpu_list, temperature=temperature) + # get response for each prompts (can be improved later using async generation) responses_list = [] for prompt in prompts: responses = await model.generate(prompt, num_samples=3) for response in responses: responses_list.append(response) - print(f"[GPU {gpu_id}] Response: {response}") + #print(f"[GPU {gpu_list}] Response: {response}") return responses_list -def start_collector(gpu_id: int, prompts: list, model_path: str, temperature: float) -> List[str]: +def start_collector(gpu_list: list, prompts: list, model_path: str, temperature: float) -> List[str]: # event loop in a process - results = asyncio.run(run_vllm_collector(gpu_id, prompts, model_path, temperature)) + results = asyncio.run(run_vllm_collector(gpu_list, prompts, model_path, temperature)) return results -def main(prompts: list, model_path: str, free_gpus: List[int], temperature: float) -> None: - num_tot = len(prompts) - num_gpu = len(free_gpus) - num_per_gpu = num_tot // num_gpu - prompts_per_gpu = chunk_list(prompts, num_per_gpu) - with concurrent.futures.ProcessPoolExecutor(max_workers=len(free_gpus)) as executor: +def main(prompts: list, model_path: str, free_gpus: List[int], temperature: float, num_per_gpus_collector: int) -> None: + # solve how mant collectors to use + num_collector = len(free_gpus) // num_per_gpus_collector + # slove how many gpus a collector should use + gpus_per_collector = chunk_list(free_gpus, num_collector) + # split input_prompts to collectors equally + prompts_per_gpu = chunk_list(prompts, num_collector) + with concurrent.futures.ProcessPoolExecutor(max_workers=num_collector) as executor: futures = [] - for gpu_id, prompts_gpu in zip(free_gpus, prompts_per_gpu): - futures.append(executor.submit(start_collector, gpu_id, prompts_gpu, model_path, temperature)) + for gpu_list, prompts_gpu in zip(gpus_per_collector, prompts_per_gpu): + futures.append(executor.submit(start_collector, gpu_list, prompts_gpu, model_path, temperature)) # get all results all_results = [] for future in concurrent.futures.as_completed(futures): all_results.append(future.result()) - # save results - with open("/mnt/afs/wangqijian/tests/vllm_multi_gpu.txt", "w") as f: - for responses in all_results: - for response in responses: - f.write(f"{response}\n") - - -if __name__ == "__main__": - questions = [ - 'Please describe the image.', 'Please describe the image.', 'What\'s the text in the image?', - 'What\'s the text in the image?', 'What is in the image?', 'What is in the image?', - 'How many people are in the image?', 'How many people are in the image?', - 'What is the emotion of the main character of the image?', - 'What is the emotion of the main character of the image?', 'How many animals are in the image?', - 'How many animals are in the image?', 'What is the place of the image?', 'What is the place of the image?', - 'What is the peroson doing?', 'What is the peroson doing?' - ] - img_names = [ - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2127)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5394)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(1160)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4956)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2212)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(3387)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4086)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4384)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5000)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(1237)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(766)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(6031)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(6)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(2284)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(4533)', - '/mnt/afs/niuyazhe/data/meme/data/Cimages/Cimages/Cimages/Image_(5495)' - ] + return all_results + + +@pytest.mark.unittest +def test_main(): + # get dataset + hf_dataset = load_dataset("MMInstruction/VL-RewardBench", split='test') + img_names = [] + questions = [] + num = 16 + for i in range(num): + img_names.append(hf_dataset[i]["image"]) + questions.append(hf_dataset[i]["query"]) + assert len(img_names) == len(questions) + #get gpus free_gpus = get_free_gpus() + # set modality modality = Modality.IMAGE + # get input mm_input = get_multi_modal_input(modality, img_names, questions) data = mm_input["data"] question = mm_input["question"] + # get prompts prompts, stop_token_ids = get_prompts_qwen(question, modality) - model_path = '/mnt/afs/share/Qwen2-VL-7B' + # set necessary parameters + model_path = 'Qwen/Qwen2-VL-7B' temperature = 0.5 - main(prompts, model_path, free_gpus, temperature) + num_gpus_per_collector = 1 + assert len(free_gpus) >= num_gpus_per_collector + # set inputs + inputs = [{"prompt": prompt, "multi_modal_data": {modality.value: data}} for prompt, data in zip(prompts, data)] + # get results + result = main(inputs, model_path, free_gpus, temperature, num_gpus_per_collector) + # default num_smaples is 3, can be modified in line 93 + assert len(result) == len(questions) diff --git a/ding/worker/collector/vllm_collector.py b/ding/worker/collector/vllm_collector.py index ca37a74039..eefe35c33b 100644 --- a/ding/worker/collector/vllm_collector.py +++ b/ding/worker/collector/vllm_collector.py @@ -13,16 +13,46 @@ from .base_serial_collector import ISerialCollector +def get_free_gpus() -> List[int]: + """ + Overview: + Get IDs of GPUs with free memory. + Returns: + - List[int]: The IDs of the free GPUs. + """ + try: + # Get GPU memory usage using nvidia-smi + gpu_stats = os.popen('nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader')\ + .readlines() + free_gpus = [] + + for gpu_id, stats in enumerate(gpu_stats): + mem_used, mem_total = map(int, stats.strip().split(',')) + # Consider GPU as free if less than 5% memory is used + if mem_used / mem_total < 0.05: + free_gpus.append(gpu_id) + + return free_gpus if free_gpus else [0] # Default to GPU 0 if no free GPUs found + except Exception: + logger.warning("Failed to get GPU stats, defaulting to GPU 0") + return [0] + + class VllmActor: - def __init__(self, model_path: str, mm_processor_kwargs: dict) -> None: + def __init__(self, model_path: str, mm_processor_kwargs: dict, free_gpus: list = None) -> None: """ Overview: Initialize the vLLM actor. For more details, please refer to https://docs.vllm.ai/en/stable. Arguments: - model_path (str): The path to the language model. + - mm_processor_kwargs(dict): Multimodal processor kwargs for vision-language models + - free_gpus(list): gpus for the model """ - self.free_gpus = self.get_free_gpus() + if free_gpus is None: + self.free_gpus = get_free_gpus() + else: + self.free_gpus = free_gpus self.num_gpus = len(self.free_gpus) assert self.num_gpus > 0, "No GPUs found" # Set CUDA_VISIBLE_DEVICES to use only free GPUs @@ -31,30 +61,6 @@ def __init__(self, model_path: str, mm_processor_kwargs: dict) -> None: self.mm_processor_kwargs = mm_processor_kwargs self._initialize() - def get_free_gpus(self) -> List[int]: - """ - Overview: - Get IDs of GPUs with free memory. - Returns: - - List[int]: The IDs of the free GPUs. - """ - try: - # Get GPU memory usage using nvidia-smi - gpu_stats = os.popen('nvidia-smi --query-gpu=memory.used,memory.total --format=csv,nounits,noheader')\ - .readlines() - free_gpus = [] - - for gpu_id, stats in enumerate(gpu_stats): - mem_used, mem_total = map(int, stats.strip().split(',')) - # Consider GPU as free if less than 5% memory is used - if mem_used / mem_total < 0.05: - free_gpus.append(gpu_id) - - return free_gpus if free_gpus else [0] # Default to GPU 0 if no free GPUs found - except Exception: - logger.warning("Failed to get GPU stats, defaulting to GPU 0") - return [0] - def _initialize(self) -> None: """ Overview: @@ -113,6 +119,7 @@ class HuggingFaceModelGenerator: def __init__( self, model_path: str, + free_gpus: list, max_tokens: int = 1024, temperature: float = 0, mm_processor_kwargs: dict = { @@ -128,7 +135,7 @@ def __init__( - max_tokens (int): The maximum number of tokens to generate, default to 1024. - temperature (float): The temperature for the language model, default to 0. """ - self.vllm_actor = VllmActor(model_path, mm_processor_kwargs) + self.vllm_actor = VllmActor(model_path, mm_processor_kwargs, free_gpus) self.max_tokens = max_tokens self.temperature = temperature @@ -208,7 +215,9 @@ def __init__(self, cfg: EasyDict) -> None: extra_input_keys=cfg.extra_input_keys ) - self._model = VllmActor(model_path=cfg.model_path, mm_processor_kwargs=cfg.mm_processor_kwargs) + self._model = VllmActor( + model_path=cfg.model_path, mm_processor_kwargs=cfg.mm_processor_kwargs, free_gpus=cfg.free_gpus + ) self.reset() def reset(self) -> None: @@ -235,6 +244,16 @@ def reset_env(self, _env: Optional[Any] = None) -> None: pass async def _generate_for_prompt(self, prompt: str, num_samples_per_prompt: int) -> List[Tuple[str, float]]: + """ + Overview: + Generate response for the prompt. + Arguments: + - prompt(str) : The prompt to generate tactics. + - num_samples_per_prompt (int): The number of tactics to generate. + Returns: + - List[Tuple[str, float]]: The generated tactics and their log-probabilities. + + """ return await self._model.generate( prompt=prompt, num_samples=num_samples_per_prompt, @@ -382,7 +401,7 @@ def collect_prompts( results_list = loop.run_until_complete(asyncio.gather(*tasks)) for i, prompt in enumerate(prompts): results[prompt['prompt']] = [] - for result in results_list[i * 4:(i + 1) * 4]: + for result in results_list[i * num_samples_per_prompt:(i + 1) * num_samples_per_prompt]: results[prompt['prompt']].append(result.outputs[0].text) return results diff --git a/dizoo/d4rl/config/halfcheetah_medium_expert_iql_config.py b/dizoo/d4rl/config/halfcheetah_medium_expert_iql_config.py index 144feac1dd..e3aa855afe 100644 --- a/dizoo/d4rl/config/halfcheetah_medium_expert_iql_config.py +++ b/dizoo/d4rl/config/halfcheetah_medium_expert_iql_config.py @@ -18,7 +18,6 @@ model=dict( obs_shape=17, action_shape=6, - ), learn=dict( data_path=None, diff --git a/dizoo/d4rl/config/halfcheetah_medium_iql_config.py b/dizoo/d4rl/config/halfcheetah_medium_iql_config.py index 545ecf970b..440525a320 100644 --- a/dizoo/d4rl/config/halfcheetah_medium_iql_config.py +++ b/dizoo/d4rl/config/halfcheetah_medium_iql_config.py @@ -18,7 +18,6 @@ model=dict( obs_shape=17, action_shape=6, - ), learn=dict( data_path=None, diff --git a/dizoo/d4rl/config/halfcheetah_medium_replay_iql_config.py b/dizoo/d4rl/config/halfcheetah_medium_replay_iql_config.py index d48a1fb472..0974735b72 100644 --- a/dizoo/d4rl/config/halfcheetah_medium_replay_iql_config.py +++ b/dizoo/d4rl/config/halfcheetah_medium_replay_iql_config.py @@ -18,7 +18,6 @@ model=dict( obs_shape=17, action_shape=6, - ), learn=dict( data_path=None, diff --git a/dizoo/d4rl/config/hopper_medium_expert_iql_config.py b/dizoo/d4rl/config/hopper_medium_expert_iql_config.py index 6aef029c5e..2eebce2771 100644 --- a/dizoo/d4rl/config/hopper_medium_expert_iql_config.py +++ b/dizoo/d4rl/config/hopper_medium_expert_iql_config.py @@ -18,7 +18,6 @@ model=dict( obs_shape=11, action_shape=3, - ), learn=dict( data_path=None, diff --git a/dizoo/d4rl/config/hopper_medium_iql_config.py b/dizoo/d4rl/config/hopper_medium_iql_config.py index 8f429be268..61dbb5fac3 100644 --- a/dizoo/d4rl/config/hopper_medium_iql_config.py +++ b/dizoo/d4rl/config/hopper_medium_iql_config.py @@ -18,7 +18,6 @@ model=dict( obs_shape=11, action_shape=3, - ), learn=dict( data_path=None, diff --git a/dizoo/d4rl/config/hopper_medium_replay_iql_config.py b/dizoo/d4rl/config/hopper_medium_replay_iql_config.py index ad1b222843..df96a84aea 100644 --- a/dizoo/d4rl/config/hopper_medium_replay_iql_config.py +++ b/dizoo/d4rl/config/hopper_medium_replay_iql_config.py @@ -18,7 +18,6 @@ model=dict( obs_shape=11, action_shape=3, - ), learn=dict( data_path=None,