feat(models): add qwen3 0.6b variants

glennko · glennko · commit a706b5bbe916 · 2025-10-20T22:26:53.000-04:00
diff --git a/docs/docs/overview/quickstart/finetune_guide.md b/docs/docs/overview/quickstart/finetune_guide.md
@@ -42,15 +42,32 @@ Start by loading the instruction dataset and initializing the model of your choi
 
 <Test instruction={'Instruction'}/>
 
-A list of all the supported models can be found [here](/overview/supported_models).
-
-
-
-Next, we need to start the fine-tuning
-
-```python
-model.finetune(dataset=instruction_dataset)
-```
+A list of all the supported models can be found [here](/overview/supported_models).
+
+
+
+Next, we need to start the fine-tuning
+
+```python
+model.finetune(dataset=instruction_dataset)
+```
+
+### Example: Qwen3 0.6B with LoRA
+
+```python
+from xturing.datasets import InstructionDataset
+from xturing.models import BaseModel
+
+instruction_dataset = InstructionDataset("/path/to/your/dataset")
+model = BaseModel.create("qwen3_0_6b_lora")
+model.finetune(dataset=instruction_dataset)
+```
+
+You can find a runnable script at `examples/models/qwen3/qwen3_lora_finetune.py`.
+
+```bash
+xturing finetune --model qwen3_0_6b_lora --data-dir /path/to/your/dataset
+```
 
 <!-- Finally, let us test how our fine-tuned model performs using the `.generate()` function.
 
diff --git a/docs/docs/overview/supported_models.md b/docs/docs/overview/supported_models.md
@@ -17,6 +17,7 @@ description: Models Supported by xTuring
 | GPT-2  | gpt2 | ✅  | ✅ | ✅ | ✅ |
 | LLaMA  7B | llama | ✅ | ✅ | ✅ | ✅ |
 | LLaMA2  | llama2 | ✅ | ✅ | ✅ | ✅ |
+| Qwen3 0.6B | qwen3_0_6b | ✅ | ✅ | ✅ | ✅ |
 | OPT 1.3B  | opt | ✅ | ✅ |  ✅ | ✅ |
 
 ### Memory-efficient versions
diff --git a/examples/models/qwen3/qwen3_lora_finetune.py b/examples/models/qwen3/qwen3_lora_finetune.py
@@ -0,0 +1,34 @@
+"""Minimal example showing how to fine-tune Qwen3-0.6B with LoRA using xTuring."""
+from pathlib import Path
+
+from xturing.datasets.instruction_dataset import InstructionDataset
+from xturing.models import BaseModel
+
+# Reuse the small Alpaca-style dataset that ships with the repo. Replace this path
+# with your own instruction dataset when running real experiments.
+DATASET_DIR = Path(__file__).parent.parent / "llama" / "alpaca_data"
+
+# Location where the LoRA adapters will be stored once training finishes.
+OUTPUT_DIR = Path(__file__).parent / "qwen3_lora_weights"
+
+
+def main():
+    instruction_dataset = InstructionDataset(str(DATASET_DIR))
+
+    # Initialize the Qwen3 0.6B model with a LoRA adapter head.
+    model = BaseModel.create("qwen3_0_6b_lora")
+
+    # Launch fine-tuning with the default configuration (see
+    # src/xturing/config/finetuning_config.yaml for the exact hyper-parameters).
+    model.finetune(dataset=instruction_dataset)
+
+    # Run a quick generation to sanity-check the adapter before saving.
+    output = model.generate(texts=["Why are smaller language models becoming popular?"])
+    print(f"Generated output: {output}")
+
+    # Persist the adapter and tokenizer so the run can be resumed or deployed later.
+    model.save(str(OUTPUT_DIR))
+    print(f"Saved fine-tuned weights to {OUTPUT_DIR}")
+
+if __name__ == "__main__":
+    main()
diff --git a/src/xturing/config/finetuning_config.yaml b/src/xturing/config/finetuning_config.yaml
@@ -302,6 +302,38 @@ mamba:
   learning_rate: 5e-5
   weight_decay: 0.01
 
+qwen3_0_6b:
+  learning_rate: 5e-5
+  weight_decay: 0.01
+  num_train_epochs: 3
+
+qwen3_0_6b_lora:
+  learning_rate: 1e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 4
+
+qwen3_0_6b_int8:
+  learning_rate: 1e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 4
+  max_length: 256
+
+qwen3_0_6b_lora_int8:
+  learning_rate: 1e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 8
+  max_length: 256
+
+qwen3_0_6b_lora_kbit:
+  learning_rate: 1e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 4
+  max_length: 512
+
 opt:
   learning_rate: 5e-5
   weight_decay: 0.01
diff --git a/src/xturing/config/generation_config.yaml b/src/xturing/config/generation_config.yaml
@@ -316,6 +316,35 @@ llama2_lora_kbit:
 mamba:
   do_sample: false
 
+# Contrastive search
+qwen3_0_6b:
+  penalty_alpha: 0.6
+  top_k: 4
+  max_new_tokens: 256
+  do_sample: false
+
+# Contrastive search
+qwen3_0_6b_lora:
+  penalty_alpha: 0.6
+  top_k: 4
+  max_new_tokens: 256
+  do_sample: false
+
+# Greedy search
+qwen3_0_6b_int8:
+  max_new_tokens: 256
+  do_sample: false
+
+# Greedy search
+qwen3_0_6b_lora_int8:
+  max_new_tokens: 256
+  do_sample: false
+
+# Greedy search
+qwen3_0_6b_lora_kbit:
+  max_new_tokens: 256
+  do_sample: false
+
 # Contrastive search
 opt:
   penalty_alpha: 0.6
diff --git a/src/xturing/engines/__init__.py b/src/xturing/engines/__init__.py
@@ -71,6 +71,13 @@
     LlamaLoraKbitEngine,
 )
 from xturing.engines.mamba_engine import MambaEngine
+from xturing.engines.qwen_engine import (
+    Qwen3Engine,
+    Qwen3Int8Engine,
+    Qwen3LoraEngine,
+    Qwen3LoraInt8Engine,
+    Qwen3LoraKbitEngine,
+)
 from xturing.engines.opt_engine import (
     OPTEngine,
     OPTInt8Engine,
@@ -135,6 +142,11 @@
 BaseEngine.add_to_registry(LLama2LoraInt8Engine.config_name, LLama2LoraInt8Engine)
 BaseEngine.add_to_registry(LLama2LoraKbitEngine.config_name, LLama2LoraKbitEngine)
 BaseEngine.add_to_registry(MambaEngine.config_name, MambaEngine)
+BaseEngine.add_to_registry(Qwen3Engine.config_name, Qwen3Engine)
+BaseEngine.add_to_registry(Qwen3Int8Engine.config_name, Qwen3Int8Engine)
+BaseEngine.add_to_registry(Qwen3LoraEngine.config_name, Qwen3LoraEngine)
+BaseEngine.add_to_registry(Qwen3LoraInt8Engine.config_name, Qwen3LoraInt8Engine)
+BaseEngine.add_to_registry(Qwen3LoraKbitEngine.config_name, Qwen3LoraKbitEngine)
 BaseEngine.add_to_registry(OPTEngine.config_name, OPTEngine)
 BaseEngine.add_to_registry(OPTInt8Engine.config_name, OPTInt8Engine)
 BaseEngine.add_to_registry(OPTLoraEngine.config_name, OPTLoraEngine)
diff --git a/src/xturing/engines/qwen_engine.py b/src/xturing/engines/qwen_engine.py
@@ -0,0 +1,90 @@
+from pathlib import Path
+from typing import Optional, Union
+
+from xturing.engines.causal import CausalEngine, CausalLoraEngine, CausalLoraKbitEngine
+
+_DEFAULT_MODEL_NAME = "Qwen/Qwen3-0.6B"
+_TARGET_MODULES = [
+    "q_proj",
+    "k_proj",
+    "v_proj",
+    "o_proj",
+    "gate_proj",
+    "up_proj",
+    "down_proj",
+]
+
+
+class Qwen3Engine(CausalEngine):
+    config_name: str = "qwen3_0_6b_engine"
+
+    def __init__(self, weights_path: Optional[Union[str, Path]] = None):
+        super().__init__(
+            model_name=_DEFAULT_MODEL_NAME,
+            weights_path=weights_path,
+            trust_remote_code=True,
+        )
+
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
+
+
+class Qwen3LoraEngine(CausalLoraEngine):
+    config_name: str = "qwen3_0_6b_lora_engine"
+
+    def __init__(self, weights_path: Optional[Union[str, Path]] = None):
+        super().__init__(
+            model_name=_DEFAULT_MODEL_NAME,
+            weights_path=weights_path,
+            target_modules=_TARGET_MODULES,
+            trust_remote_code=True,
+        )
+
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
+
+
+class Qwen3Int8Engine(CausalEngine):
+    config_name: str = "qwen3_0_6b_int8_engine"
+
+    def __init__(self, weights_path: Optional[Union[str, Path]] = None):
+        super().__init__(
+            model_name=_DEFAULT_MODEL_NAME,
+            weights_path=weights_path,
+            load_8bit=True,
+            trust_remote_code=True,
+        )
+
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
+
+
+class Qwen3LoraInt8Engine(CausalLoraEngine):
+    config_name: str = "qwen3_0_6b_lora_int8_engine"
+
+    def __init__(self, weights_path: Optional[Union[str, Path]] = None):
+        super().__init__(
+            model_name=_DEFAULT_MODEL_NAME,
+            weights_path=weights_path,
+            load_8bit=True,
+            target_modules=_TARGET_MODULES,
+            trust_remote_code=True,
+        )
+
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
+
+
+class Qwen3LoraKbitEngine(CausalLoraKbitEngine):
+    config_name: str = "qwen3_0_6b_lora_kbit_engine"
+
+    def __init__(self, weights_path: Optional[Union[str, Path]] = None):
+        super().__init__(
+            model_name=_DEFAULT_MODEL_NAME,
+            weights_path=weights_path,
+            target_modules=_TARGET_MODULES,
+            trust_remote_code=True,
+        )
+
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
diff --git a/src/xturing/models/__init__.py b/src/xturing/models/__init__.py
@@ -56,6 +56,13 @@
     Llama2LoraKbit,
 )
 from xturing.models.mamba import Mamba
+from xturing.models.qwen import (
+    Qwen3,
+    Qwen3Int8,
+    Qwen3Lora,
+    Qwen3LoraInt8,
+    Qwen3LoraKbit,
+)
 from xturing.models.opt import OPT, OPTInt8, OPTLora, OPTLoraInt8
 from xturing.models.stable_diffusion import StableDiffusion
 
@@ -112,6 +119,11 @@
 BaseModel.add_to_registry(Llama2LoraInt8.config_name, Llama2LoraInt8)
 BaseModel.add_to_registry(Llama2LoraKbit.config_name, Llama2LoraKbit)
 BaseModel.add_to_registry(Mamba.config_name, Mamba)
+BaseModel.add_to_registry(Qwen3.config_name, Qwen3)
+BaseModel.add_to_registry(Qwen3Int8.config_name, Qwen3Int8)
+BaseModel.add_to_registry(Qwen3Lora.config_name, Qwen3Lora)
+BaseModel.add_to_registry(Qwen3LoraInt8.config_name, Qwen3LoraInt8)
+BaseModel.add_to_registry(Qwen3LoraKbit.config_name, Qwen3LoraKbit)
 BaseModel.add_to_registry(OPT.config_name, OPT)
 BaseModel.add_to_registry(OPTInt8.config_name, OPTInt8)
 BaseModel.add_to_registry(OPTLora.config_name, OPTLora)
diff --git a/src/xturing/models/qwen.py b/src/xturing/models/qwen.py
@@ -0,0 +1,51 @@
+from typing import Optional
+
+from xturing.engines.qwen_engine import (
+    Qwen3Engine,
+    Qwen3Int8Engine,
+    Qwen3LoraEngine,
+    Qwen3LoraInt8Engine,
+    Qwen3LoraKbitEngine,
+)
+from xturing.models.causal import (
+    CausalInt8Model,
+    CausalLoraInt8Model,
+    CausalLoraKbitModel,
+    CausalLoraModel,
+    CausalModel,
+)
+
+
+class Qwen3(CausalModel):
+    config_name: str = "qwen3_0_6b"
+
+    def __init__(self, weights_path: Optional[str] = None):
+        super().__init__(Qwen3Engine.config_name, weights_path)
+
+
+class Qwen3Lora(CausalLoraModel):
+    config_name: str = "qwen3_0_6b_lora"
+
+    def __init__(self, weights_path: Optional[str] = None):
+        super().__init__(Qwen3LoraEngine.config_name, weights_path)
+
+
+class Qwen3Int8(CausalInt8Model):
+    config_name: str = "qwen3_0_6b_int8"
+
+    def __init__(self, weights_path: Optional[str] = None):
+        super().__init__(Qwen3Int8Engine.config_name, weights_path)
+
+
+class Qwen3LoraInt8(CausalLoraInt8Model):
+    config_name: str = "qwen3_0_6b_lora_int8"
+
+    def __init__(self, weights_path: Optional[str] = None):
+        super().__init__(Qwen3LoraInt8Engine.config_name, weights_path)
+
+
+class Qwen3LoraKbit(CausalLoraKbitModel):
+    config_name: str = "qwen3_0_6b_lora_kbit"
+
+    def __init__(self, weights_path: Optional[str] = None):
+        super().__init__(Qwen3LoraKbitEngine.config_name, weights_path)
diff --git a/tests/xturing/models/test_qwen_model.py b/tests/xturing/models/test_qwen_model.py