Merge pull request #307 from stochasticai/marcos/minimax_m2

MarcosRiveraMartinez · web-flow · commit 7ea38e09a9cf · 2025-10-31T11:41:09.000+01:00
Marcos/minimax m2
diff --git a/README.md b/README.md
@@ -268,6 +268,7 @@ Below is a list of all the supported models via `BaseModel` class of `xTuring` a
 |GPT-2 | gpt2|
 |LLaMA | llama|
 |LLaMA2 | llama2|
+|MiniMaxM2 | minimax_m2|
 |OPT-1.3B | opt|
 
 The above are the base variants. Use these templates for `LoRA`, `INT8`, and `INT8 + LoRA` versions:
diff --git a/docs/docs/overview/supported_models.md b/docs/docs/overview/supported_models.md
@@ -17,6 +17,7 @@ description: Models Supported by xTuring
 | GPT-2  | gpt2 | ✅  | ✅ | ✅ | ✅ |
 | LLaMA  7B | llama | ✅ | ✅ | ✅ | ✅ |
 | LLaMA2  | llama2 | ✅ | ✅ | ✅ | ✅ |
+| MiniMaxM2 | minimax_m2 | ✅ | ✅ | ✅ | ✅ |
 | Qwen3 0.6B | qwen3_0_6b | ✅ | ✅ | ✅ | ✅ |
 | OPT 1.3B  | opt | ✅ | ✅ |  ✅ | ✅ |
 
diff --git a/examples/models/minimax_m2/README.md b/examples/models/minimax_m2/README.md
@@ -0,0 +1,106 @@
+# MiniMaxM2 Model Examples
+
+This directory contains examples for using the MiniMaxM2 model from HuggingFace with xTuring.
+
+## Model Information
+
+- **Model**: MiniMaxAI/MiniMax-M2
+- **Source**: [HuggingFace Model Hub](https://huggingface.co/MiniMaxAI/MiniMax-M2)
+
+## Available Variants
+
+The MiniMaxM2 model is available in multiple configurations:
+
+1. **minimax_m2** - Base model
+2. **minimax_m2_lora** - LoRA fine-tuning enabled
+3. **minimax_m2_int8** - 8-bit quantized version
+4. **minimax_m2_lora_int8** - LoRA with 8-bit quantization
+5. **minimax_m2_lora_kbit** - LoRA with 4-bit quantization
+
+## Usage Examples
+
+### Basic Inference
+
+```python
+from xturing.models import BaseModel
+
+# Create the model
+model = BaseModel.create("minimax_m2")
+
+# Generate text
+output = model.generate(texts=["What is machine learning?"])
+print(output)
+```
+
+### Fine-tuning with LoRA
+
+```python
+from xturing.datasets.instruction_dataset import InstructionDataset
+from xturing.models import BaseModel
+
+# Load dataset
+dataset = InstructionDataset("path/to/your/dataset")
+
+# Create model with LoRA
+model = BaseModel.create("minimax_m2_lora")
+
+# Fine-tune
+model.finetune(dataset=dataset)
+
+# Save
+model.save("./minimax_m2_finetuned")
+```
+
+### Memory-Efficient Inference
+
+For machines with limited GPU memory, use quantized versions:
+
+```python
+from xturing.models import BaseModel
+
+# Use 8-bit quantization
+model = BaseModel.create("minimax_m2_int8")
+
+# Or use 4-bit quantization with LoRA
+model = BaseModel.create("minimax_m2_lora_kbit")
+
+output = model.generate(texts=["Your prompt here"])
+```
+
+## Files
+
+- `minimax_m2_example.py` - Basic usage example
+- `minimax_m2_finetune.py` - Fine-tuning example
+- `README.md` - This file
+
+## Configuration
+
+The model uses the following default settings:
+
+### Generation Config
+- `max_new_tokens`: 512
+- `temperature`: 0.1
+- `penalty_alpha`: 0.6 (for contrastive search)
+- `top_k`: 4
+
+### Fine-tuning Config
+- `learning_rate`: 2e-4 (LoRA variants)
+- `num_train_epochs`: 3
+- `max_length`: 2048
+- `batch_size`: Varies by variant
+
+These can be customized through the configuration files or when creating the model.
+
+## Requirements
+
+Make sure you have xTuring installed with all dependencies:
+
+```bash
+pip install xturing
+```
+
+## Notes
+
+- The model requires `trust_remote_code=True` to load properly
+- LoRA variants are recommended for fine-tuning as they are more parameter-efficient
+- Quantized versions (int8, kbit) require less memory but may have slightly reduced accuracy
diff --git a/examples/models/minimax_m2/minimax_m2_example.py b/examples/models/minimax_m2/minimax_m2_example.py
@@ -0,0 +1,30 @@
+"""
+Example usage of MiniMaxM2 model with xTuring
+
+This example demonstrates how to use the MiniMaxM2 model from HuggingFace
+with the xTuring library for inference and fine-tuning.
+
+Model: MiniMaxAI/MiniMax-M2
+"""
+
+from xturing.models import BaseModel
+
+# Example 1: Create the base MiniMaxM2 model
+print("Loading MiniMaxM2 model...")
+model = BaseModel.create("minimax_m2")
+
+# Generate text
+output = model.generate(texts=["What is machine learning?"])
+print("Base model output:")
+print(output)
+
+# Example 2: Use LoRA version for efficient fine-tuning
+print("\nLoading MiniMaxM2 with LoRA...")
+model_lora = BaseModel.create("minimax_m2_lora")
+
+# You can also use INT8 quantized versions for memory efficiency
+# model_int8 = BaseModel.create("minimax_m2_int8")
+# model_lora_int8 = BaseModel.create("minimax_m2_lora_int8")
+# model_lora_kbit = BaseModel.create("minimax_m2_lora_kbit")
+
+print("MiniMaxM2 model loaded successfully!")
diff --git a/examples/models/minimax_m2/minimax_m2_finetune.py b/examples/models/minimax_m2/minimax_m2_finetune.py
@@ -0,0 +1,31 @@
+"""
+Fine-tuning MiniMaxM2 model with xTuring
+
+This example shows how to fine-tune the MiniMaxM2 model using LoRA
+for parameter-efficient training.
+"""
+
+from xturing.datasets.instruction_dataset import InstructionDataset
+from xturing.models import BaseModel
+
+# Load your instruction dataset
+# You can use your own dataset or create one following the xTuring format
+instruction_dataset = InstructionDataset("path/to/your/dataset")
+
+# Initialize the model with LoRA for efficient fine-tuning
+model = BaseModel.create("minimax_m2_lora")
+
+# Fine-tune the model
+print("Starting fine-tuning...")
+model.finetune(dataset=instruction_dataset)
+
+# Save the fine-tuned model
+model.save("./minimax_m2_finetuned")
+
+# Load and use the fine-tuned model
+print("Loading fine-tuned model...")
+finetuned_model = BaseModel.load("./minimax_m2_finetuned")
+
+# Generate with the fine-tuned model
+output = finetuned_model.generate(texts=["Your prompt here"])
+print(output)
diff --git a/src/xturing/config/finetuning_config.yaml b/src/xturing/config/finetuning_config.yaml
@@ -302,6 +302,52 @@ mamba:
   learning_rate: 5e-5
   weight_decay: 0.01
 
+# MiniMaxM2 model fine-tuning configurations
+minimax_m2:
+  learning_rate: 1e-5
+  weight_decay: 0.01
+  num_train_epochs: 1
+  batch_size: 1
+  gradient_accumulation_steps: 8
+  max_length: 2048
+  warmup_steps: 100
+
+minimax_m2_lora:
+  learning_rate: 2e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 2
+  gradient_accumulation_steps: 4
+  max_length: 2048
+  warmup_steps: 100
+
+minimax_m2_int8:
+  learning_rate: 1e-4
+  weight_decay: 0.01
+  num_train_epochs: 2
+  batch_size: 2
+  gradient_accumulation_steps: 4
+  max_length: 2048
+  warmup_steps: 100
+
+minimax_m2_lora_int8:
+  learning_rate: 2e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 4
+  gradient_accumulation_steps: 2
+  max_length: 2048
+  warmup_steps: 100
+
+minimax_m2_lora_kbit:
+  learning_rate: 2e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 8
+  gradient_accumulation_steps: 1
+  max_length: 2048
+  warmup_steps: 100
+
 qwen3_0_6b:
   learning_rate: 5e-5
   weight_decay: 0.01
diff --git a/src/xturing/config/generation_config.yaml b/src/xturing/config/generation_config.yaml
@@ -316,6 +316,36 @@ llama2_lora_kbit:
 mamba:
   do_sample: false
 
+# Contrastive search for MiniMaxM2
+minimax_m2:
+  penalty_alpha: 0.6
+  top_k: 4
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
+minimax_m2_lora:
+  penalty_alpha: 0.6
+  top_k: 4
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
+minimax_m2_int8:
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
+minimax_m2_lora_int8:
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
+minimax_m2_lora_kbit:
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
 # Contrastive search
 qwen3_0_6b:
   penalty_alpha: 0.6
diff --git a/src/xturing/engines/__init__.py b/src/xturing/engines/__init__.py
@@ -71,6 +71,13 @@
     LlamaLoraKbitEngine,
 )
 from xturing.engines.mamba_engine import MambaEngine
+from xturing.engines.minimax_m2_engine import (
+    MiniMaxM2Engine,
+    MiniMaxM2Int8Engine,
+    MiniMaxM2LoraEngine,
+    MiniMaxM2LoraInt8Engine,
+    MiniMaxM2LoraKbitEngine,
+)
 from xturing.engines.opt_engine import (
     OPTEngine,
     OPTInt8Engine,
@@ -142,6 +149,11 @@
 BaseEngine.add_to_registry(LLama2LoraInt8Engine.config_name, LLama2LoraInt8Engine)
 BaseEngine.add_to_registry(LLama2LoraKbitEngine.config_name, LLama2LoraKbitEngine)
 BaseEngine.add_to_registry(MambaEngine.config_name, MambaEngine)
+BaseEngine.add_to_registry(MiniMaxM2Engine.config_name, MiniMaxM2Engine)
+BaseEngine.add_to_registry(MiniMaxM2Int8Engine.config_name, MiniMaxM2Int8Engine)
+BaseEngine.add_to_registry(MiniMaxM2LoraEngine.config_name, MiniMaxM2LoraEngine)
+BaseEngine.add_to_registry(MiniMaxM2LoraInt8Engine.config_name, MiniMaxM2LoraInt8Engine)
+BaseEngine.add_to_registry(MiniMaxM2LoraKbitEngine.config_name, MiniMaxM2LoraKbitEngine)
 BaseEngine.add_to_registry(Qwen3Engine.config_name, Qwen3Engine)
 BaseEngine.add_to_registry(Qwen3Int8Engine.config_name, Qwen3Int8Engine)
 BaseEngine.add_to_registry(Qwen3LoraEngine.config_name, Qwen3LoraEngine)
diff --git a/src/xturing/engines/minimax_m2_engine.py b/src/xturing/engines/minimax_m2_engine.py
diff --git a/src/xturing/models/__init__.py b/src/xturing/models/__init__.py
diff --git a/src/xturing/models/minimax_m2.py b/src/xturing/models/minimax_m2.py
diff --git a/tests/xturing/models/test_minimax_m2.py b/tests/xturing/models/test_minimax_m2.py