stochasticai
diff --git a/‎README.md‎
Lines changed: 21 additions & 9 deletions b/‎README.md‎
Lines changed: 21 additions & 9 deletions
diff --git a/‎src/xturing/config/finetuning_config.yaml‎
Lines changed: 92 additions & 0 deletions b/‎src/xturing/config/finetuning_config.yaml‎
Lines changed: 92 additions & 0 deletions
diff --git a/‎src/xturing/config/generation_config.yaml‎
Lines changed: 60 additions & 0 deletions b/‎src/xturing/config/generation_config.yaml‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎src/xturing/engines/__init__.py‎
Lines changed: 26 additions & 0 deletions b/‎src/xturing/engines/__init__.py‎
Lines changed: 26 additions & 0 deletions
@@ -20,7 +20,7 @@
 
 ___
 
-`xTuring` provides fast, efficient and simple fine-tuning of open-source LLMs, such as Mistral, LLaMA, GPT-J, and more.
+`xTuring` provides fast, efficient and simple fine-tuning of open-source LLMs, such as OpenAI's GPT-OSS, Mistral, LLaMA, GPT-J, and more.
 By providing an easy-to-use interface for fine-tuning LLMs to your own data and application, xTuring makes it
 simple to build, modify, and control LLMs. The entire process can be done inside your computer or in your
 private cloud, ensuring data privacy and security.
@@ -50,14 +50,14 @@ from xturing.models import BaseModel
 # Load the dataset
 instruction_dataset = InstructionDataset("./examples/models/llama/alpaca_data")
 
-# Initialize the model
-model = BaseModel.create("llama_lora")
+# Initialize the GPT-OSS 20B model with LoRA
+model = BaseModel.create("gpt_oss_20b_lora")
 
 # Finetune the model
 model.finetune(dataset=instruction_dataset)
 
-# Perform inference
-output = model.generate(texts=["Why LLM models are becoming so important?"])
+# Perform inference with reasoning capabilities
+output = model.generate(texts=["Explain quantum computing and its potential applications in cryptography"])
 
 print("Generated output by the model: {}".format(output))
 ```
@@ -68,7 +68,19 @@ You can find the data folder [here](examples/models/llama/alpaca_data).
 
 ## 🌟 What's new?
 We are excited to announce the latest enhancements to our `xTuring` library:
-1. __`LLaMA 2` integration__ - You can use and fine-tune the _`LLaMA 2`_ model in different configurations: _off-the-shelf_, _off-the-shelf with INT8 precision_, _LoRA fine-tuning_, _LoRA fine-tuning with INT8 precision_ and _LoRA fine-tuning with INT4 precision_ using the `GenericModel` wrapper and/or you can use the `Llama2` class from `xturing.models` to test and finetune the model.
+1. __`OpenAI GPT-OSS` integration__ - You can now use and fine-tune OpenAI's latest open-source models _`GPT-OSS-120B`_ and _`GPT-OSS-20B`_ in different configurations: _off-the-shelf_, _off-the-shelf with INT8 precision_, _LoRA fine-tuning_, _LoRA fine-tuning with INT8 precision_ and _LoRA fine-tuning with INT4 precision_. These models feature advanced reasoning capabilities with configurable reasoning levels (low/medium/high) and support OpenAI's harmony response format.
+```python
+from xturing.models import BaseModel
+
+# Use the production-ready 120B model
+model = BaseModel.create('gpt_oss_120b_lora')
+
+# Or use the efficient 20B model for faster inference
+model = BaseModel.create('gpt_oss_20b_lora')
+
+# Both models support reasoning levels via system prompts
+```
+2. __`LLaMA 2` integration__ - You can use and fine-tune the _`LLaMA 2`_ model in different configurations: _off-the-shelf_, _off-the-shelf with INT8 precision_, _LoRA fine-tuning_, _LoRA fine-tuning with INT8 precision_ and _LoRA fine-tuning with INT4 precision_ using the `GenericModel` wrapper and/or you can use the `Llama2` class from `xturing.models` to test and finetune the model.
 ```python
 from xturing.models import Llama2
 model = Llama2()
@@ -78,7 +90,7 @@ from xturing.models import BaseModel
 model = BaseModel.create('llama2')
 
 ```
-2. __`Evaluation`__ - Now you can evaluate any `Causal Language Model` on any dataset. The metrics currently supported is [`perplexity`](https://en.wikipedia.org/wiki/Perplexity).
+3. __`Evaluation`__ - Now you can evaluate any `Causal Language Model` on any dataset. The metrics currently supported is [`perplexity`](https://en.wikipedia.org/wiki/Perplexity).
 ```python
 # Make the necessary imports
 from xturing.datasets import InstructionDataset
@@ -87,8 +99,8 @@ from xturing.models import BaseModel
 # Load the desired dataset
 dataset = InstructionDataset('../llama/alpaca_data')
 
-# Load the desired model
-model = BaseModel.create('gpt2')
+# Load the desired model (try GPT-OSS for advanced reasoning)
+model = BaseModel.create('gpt_oss_20b')
 
 # Run the Evaluation of the model on the dataset
 result = model.evaluate(dataset)
 
@@ -326,3 +326,95 @@ opt_int8:
   num_train_epochs: 3
   batch_size: 8
   max_length: 256
+
+# GPT-OSS 120B model fine-tuning configurations
+gpt_oss_120b:
+  learning_rate: 1e-5
+  weight_decay: 0.01
+  num_train_epochs: 1
+  batch_size: 1
+  gradient_accumulation_steps: 8
+  max_length: 2048
+  warmup_steps: 100
+
+gpt_oss_120b_lora:
+  learning_rate: 2e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 2
+  gradient_accumulation_steps: 4
+  max_length: 2048
+  warmup_steps: 100
+
+gpt_oss_120b_int8:
+  learning_rate: 1e-4
+  weight_decay: 0.01
+  num_train_epochs: 2
+  batch_size: 2
+  gradient_accumulation_steps: 4
+  max_length: 2048
+  warmup_steps: 100
+
+gpt_oss_120b_lora_int8:
+  learning_rate: 2e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 4
+  gradient_accumulation_steps: 2
+  max_length: 2048
+  warmup_steps: 100
+
+gpt_oss_120b_lora_kbit:
+  learning_rate: 2e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 8
+  gradient_accumulation_steps: 1
+  max_length: 2048
+  warmup_steps: 100
+
+# GPT-OSS 20B model fine-tuning configurations
+gpt_oss_20b:
+  learning_rate: 5e-5
+  weight_decay: 0.01
+  num_train_epochs: 2
+  batch_size: 2
+  gradient_accumulation_steps: 4
+  max_length: 2048
+  warmup_steps: 100
+
+gpt_oss_20b_lora:
+  learning_rate: 3e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 4
+  gradient_accumulation_steps: 2
+  max_length: 2048
+  warmup_steps: 100
+
+gpt_oss_20b_int8:
+  learning_rate: 2e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 4
+  gradient_accumulation_steps: 2
+  max_length: 2048
+  warmup_steps: 100
+
+gpt_oss_20b_lora_int8:
+  learning_rate: 3e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 8
+  gradient_accumulation_steps: 1
+  max_length: 2048
+  warmup_steps: 100
+
+gpt_oss_20b_lora_kbit:
+  learning_rate: 3e-4
+  weight_decay: 0.01
+  num_train_epochs: 3
+  batch_size: 16
+  gradient_accumulation_steps: 1
+  max_length: 2048
+  warmup_steps: 100
@@ -194,6 +194,66 @@ gpt2_int8:
   top_p: 0.92
   max_new_tokens: 256
 
+# Contrastive search for GPT-OSS models (high reasoning capability)
+gpt_oss_120b:
+  penalty_alpha: 0.6
+  top_k: 4
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
+gpt_oss_120b_lora:
+  penalty_alpha: 0.6
+  top_k: 4
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
+gpt_oss_120b_int8:
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
+gpt_oss_120b_lora_int8:
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
+gpt_oss_120b_lora_kbit:
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
+# Contrastive search for GPT-OSS 20B models
+gpt_oss_20b:
+  penalty_alpha: 0.6
+  top_k: 4
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
+gpt_oss_20b_lora:
+  penalty_alpha: 0.6
+  top_k: 4
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
+gpt_oss_20b_int8:
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
+gpt_oss_20b_lora_int8:
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
+gpt_oss_20b_lora_kbit:
+  max_new_tokens: 512
+  do_sample: false
+  temperature: 0.1
+
 # Contrastive search
 llama:
   penalty_alpha: 0.6
 
@@ -38,6 +38,18 @@
     GPT2LoraEngine,
     GPT2LoraInt8Engine,
 )
+from xturing.engines.gpt_oss_engine import (
+    GPTOSS20BEngine,
+    GPTOSS20BInt8Engine,
+    GPTOSS20BLoraEngine,
+    GPTOSS20BLoraInt8Engine,
+    GPTOSS20BLoraKbitEngine,
+    GPTOSS120BEngine,
+    GPTOSS120BInt8Engine,
+    GPTOSS120BLoraEngine,
+    GPTOSS120BLoraInt8Engine,
+    GPTOSS120BLoraKbitEngine,
+)
 from xturing.engines.gptj_engine import (
     GPTJEngine,
     GPTJInt8Engine,
@@ -98,6 +110,20 @@
 BaseEngine.add_to_registry(GPT2Int8Engine.config_name, GPT2Int8Engine)
 BaseEngine.add_to_registry(GPT2LoraEngine.config_name, GPT2LoraEngine)
 BaseEngine.add_to_registry(GPT2LoraInt8Engine.config_name, GPT2LoraInt8Engine)
+BaseEngine.add_to_registry(GPTOSS120BEngine.config_name, GPTOSS120BEngine)
+BaseEngine.add_to_registry(GPTOSS120BInt8Engine.config_name, GPTOSS120BInt8Engine)
+BaseEngine.add_to_registry(GPTOSS120BLoraEngine.config_name, GPTOSS120BLoraEngine)
+BaseEngine.add_to_registry(
+    GPTOSS120BLoraInt8Engine.config_name, GPTOSS120BLoraInt8Engine
+)
+BaseEngine.add_to_registry(
+    GPTOSS120BLoraKbitEngine.config_name, GPTOSS120BLoraKbitEngine
+)
+BaseEngine.add_to_registry(GPTOSS20BEngine.config_name, GPTOSS20BEngine)
+BaseEngine.add_to_registry(GPTOSS20BInt8Engine.config_name, GPTOSS20BInt8Engine)
+BaseEngine.add_to_registry(GPTOSS20BLoraEngine.config_name, GPTOSS20BLoraEngine)
+BaseEngine.add_to_registry(GPTOSS20BLoraInt8Engine.config_name, GPTOSS20BLoraInt8Engine)
+BaseEngine.add_to_registry(GPTOSS20BLoraKbitEngine.config_name, GPTOSS20BLoraKbitEngine)
 BaseEngine.add_to_registry(LLamaEngine.config_name, LLamaEngine)
 BaseEngine.add_to_registry(LLamaInt8Engine.config_name, LLamaInt8Engine)
 BaseEngine.add_to_registry(LlamaLoraEngine.config_name, LlamaLoraEngine)