diff --git a/openmanus_rl/__init__.py b/openmanus_rl/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/openmanus_rl/__pycache__/__init__.cpython-310.pyc b/openmanus_rl/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 00000000..9b17d985 Binary files /dev/null and b/openmanus_rl/__pycache__/__init__.cpython-310.pyc differ diff --git a/openmanus_rl/__pycache__/configs.cpython-310.pyc b/openmanus_rl/__pycache__/configs.cpython-310.pyc new file mode 100644 index 00000000..f8dce9d3 Binary files /dev/null and b/openmanus_rl/__pycache__/configs.cpython-310.pyc differ diff --git a/openmanus_rl/__pycache__/sft.cpython-310.pyc b/openmanus_rl/__pycache__/sft.cpython-310.pyc new file mode 100644 index 00000000..490c693e Binary files /dev/null and b/openmanus_rl/__pycache__/sft.cpython-310.pyc differ diff --git a/openmanus_rl/__pycache__/utils.cpython-310.pyc b/openmanus_rl/__pycache__/utils.cpython-310.pyc new file mode 100644 index 00000000..5b14d5e3 Binary files /dev/null and b/openmanus_rl/__pycache__/utils.cpython-310.pyc differ diff --git a/openmanus-rl/configs.py b/openmanus_rl/configs.py similarity index 100% rename from openmanus-rl/configs.py rename to openmanus_rl/configs.py diff --git a/openmanus-rl/grpo.py b/openmanus_rl/grpo.py similarity index 100% rename from openmanus-rl/grpo.py rename to openmanus_rl/grpo.py diff --git a/openmanus-rl/rewards.py b/openmanus_rl/rewards.py similarity index 100% rename from openmanus-rl/rewards.py rename to openmanus_rl/rewards.py diff --git a/openmanus-rl/sft.py b/openmanus_rl/sft.py similarity index 100% rename from openmanus-rl/sft.py rename to openmanus_rl/sft.py diff --git a/openmanus-rl/utils.py b/openmanus_rl/utils.py similarity index 97% rename from openmanus-rl/utils.py rename to openmanus_rl/utils.py index 1b99d586..59077727 100644 --- a/openmanus-rl/utils.py +++ b/openmanus_rl/utils.py @@ -3,7 +3,7 @@ from transformers import AutoTokenizer, PreTrainedTokenizer from trl import ModelConfig -from ..configs import GRPOConfig, SFTConfig +from .configs import GRPOConfig, SFTConfig DEFAULT_CHAT_TEMPLATE = "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}"